blob: 59a27ab1401553bf1d499d0982ef8f0a0ab0f024 [file] [log] [blame]
Matt Arsenault7836f892016-01-20 21:22:21 +00001//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
Tom Stellard75aadc22012-12-11 21:25:42 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellard75aadc22012-12-11 21:25:42 +00006//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
Adrian Prantl5f8f34e42018-05-01 15:54:18 +000010/// Defines an instruction selector for the AMDGPU target.
Tom Stellard75aadc22012-12-11 21:25:42 +000011//
12//===----------------------------------------------------------------------===//
Matt Arsenault592d0682015-12-01 23:04:05 +000013
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000014#include "AMDGPU.h"
Matt Arsenault7016f132017-08-03 22:30:46 +000015#include "AMDGPUArgumentUsageInfo.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000016#include "AMDGPUISelLowering.h" // For AMDGPUISD
Tom Stellard75aadc22012-12-11 21:25:42 +000017#include "AMDGPUInstrInfo.h"
Stanislav Mekhanoshin1c538422018-05-25 17:25:12 +000018#include "AMDGPUPerfHintAnalysis.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000019#include "AMDGPURegisterInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000020#include "AMDGPUSubtarget.h"
Matt Arsenaultcc852232017-10-10 20:22:07 +000021#include "AMDGPUTargetMachine.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000022#include "SIDefines.h"
Christian Konigf82901a2013-02-26 17:52:23 +000023#include "SIISelLowering.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000024#include "SIInstrInfo.h"
Tom Stellardb02094e2014-07-21 15:45:01 +000025#include "SIMachineFunctionInfo.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000026#include "SIRegisterInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000027#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000028#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/StringRef.h"
Nicolai Haehnle35617ed2018-08-30 14:21:36 +000031#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
Jan Veselyf97de002016-05-13 20:39:29 +000032#include "llvm/Analysis/ValueTracking.h"
Tom Stellard58ac7442014-04-29 23:12:48 +000033#include "llvm/CodeGen/FunctionLoweringInfo.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000034#include "llvm/CodeGen/ISDOpcodes.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
Benjamin Kramerd78bb462013-05-23 17:10:37 +000037#include "llvm/CodeGen/SelectionDAG.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000038#include "llvm/CodeGen/SelectionDAGISel.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000039#include "llvm/CodeGen/SelectionDAGNodes.h"
Craig Topper2fa14362018-03-29 17:21:10 +000040#include "llvm/CodeGen/ValueTypes.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000041#include "llvm/IR/BasicBlock.h"
42#include "llvm/IR/Instruction.h"
43#include "llvm/MC/MCInstrDesc.h"
44#include "llvm/Support/Casting.h"
45#include "llvm/Support/CodeGen.h"
46#include "llvm/Support/ErrorHandling.h"
David Blaikie13e77db2018-03-23 23:58:25 +000047#include "llvm/Support/MachineValueType.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000048#include "llvm/Support/MathExtras.h"
49#include <cassert>
50#include <cstdint>
51#include <new>
52#include <vector>
Tom Stellard75aadc22012-12-11 21:25:42 +000053
Matt Arsenaulte8c03a22019-03-08 20:58:11 +000054#define DEBUG_TYPE "isel"
55
Tom Stellard75aadc22012-12-11 21:25:42 +000056using namespace llvm;
57
Matt Arsenaultd2759212016-02-13 01:24:08 +000058namespace llvm {
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000059
Matt Arsenaultd2759212016-02-13 01:24:08 +000060class R600InstrInfo;
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000061
62} // end namespace llvm
Matt Arsenaultd2759212016-02-13 01:24:08 +000063
Tom Stellard75aadc22012-12-11 21:25:42 +000064//===----------------------------------------------------------------------===//
65// Instruction Selector Implementation
66//===----------------------------------------------------------------------===//
67
68namespace {
Tom Stellardbc4497b2016-02-12 23:45:29 +000069
Tom Stellard75aadc22012-12-11 21:25:42 +000070/// AMDGPU specific code to select AMDGPU machine instructions for
71/// SelectionDAG operations.
72class AMDGPUDAGToDAGISel : public SelectionDAGISel {
73 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
74 // make the right decision when generating code for different targets.
Tom Stellard5bfbae52018-07-11 20:59:01 +000075 const GCNSubtarget *Subtarget;
Matt Arsenaultcc852232017-10-10 20:22:07 +000076 bool EnableLateStructurizeCFG;
NAKAMURA Takumia9cb5382015-09-22 11:14:39 +000077
Tom Stellard75aadc22012-12-11 21:25:42 +000078public:
Matt Arsenault7016f132017-08-03 22:30:46 +000079 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
80 CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
81 : SelectionDAGISel(*TM, OptLevel) {
Matt Arsenaultcc852232017-10-10 20:22:07 +000082 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000083 }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000084 ~AMDGPUDAGToDAGISel() override = default;
Konstantin Zhuravlyov60a83732016-10-03 18:47:26 +000085
Matt Arsenault7016f132017-08-03 22:30:46 +000086 void getAnalysisUsage(AnalysisUsage &AU) const override {
87 AU.addRequired<AMDGPUArgumentUsageInfo>();
Stanislav Mekhanoshin1c538422018-05-25 17:25:12 +000088 AU.addRequired<AMDGPUPerfHintAnalysis>();
Nicolai Haehnle35617ed2018-08-30 14:21:36 +000089 AU.addRequired<LegacyDivergenceAnalysis>();
Matt Arsenault7016f132017-08-03 22:30:46 +000090 SelectionDAGISel::getAnalysisUsage(AU);
91 }
92
Matt Arsenaulte8c03a22019-03-08 20:58:11 +000093 bool matchLoadD16FromBuildVector(SDNode *N) const;
94
Eric Christopher7792e322015-01-30 23:24:40 +000095 bool runOnMachineFunction(MachineFunction &MF) override;
Matt Arsenaulte8c03a22019-03-08 20:58:11 +000096 void PreprocessISelDAG() override;
Justin Bogner95927c02016-05-12 21:03:32 +000097 void Select(SDNode *N) override;
Mehdi Amini117296c2016-10-01 02:56:57 +000098 StringRef getPassName() const override;
Craig Topper5656db42014-04-29 07:57:24 +000099 void PostprocessISelDAG() override;
Tom Stellard75aadc22012-12-11 21:25:42 +0000100
Tom Stellard20287692017-08-08 04:57:55 +0000101protected:
102 void SelectBuildVector(SDNode *N, unsigned RegClassID);
103
Tom Stellard75aadc22012-12-11 21:25:42 +0000104private:
Matt Arsenault156d3ae2017-05-17 21:02:58 +0000105 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000106 bool isNoNanSrc(SDValue N) const;
Matt Arsenaultfe267752016-07-28 00:32:02 +0000107 bool isInlineImmediate(const SDNode *N) const;
Alexander Timofeevdb7ee762018-09-11 11:56:50 +0000108 bool isVGPRImm(const SDNode *N) const;
Alexander Timofeev4d302f62018-09-13 09:06:56 +0000109 bool isUniformLoad(const SDNode *N) const;
Tom Stellardbc4497b2016-02-12 23:45:29 +0000110 bool isUniformBr(const SDNode *N) const;
111
Tim Renouff1c7b922018-08-02 22:53:57 +0000112 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
113
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000114 SDNode *glueCopyToM0LDSInit(SDNode *N) const;
115 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
Tom Stellard381a94a2015-05-12 15:00:49 +0000116
Tom Stellarddf94dc32013-08-14 23:24:24 +0000117 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
Tom Stellard20287692017-08-08 04:57:55 +0000118 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
119 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000120 bool isDSOffsetLegal(SDValue Base, unsigned Offset,
Tom Stellard85e8b6d2014-08-22 18:49:33 +0000121 unsigned OffsetBits) const;
122 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
Tom Stellardf3fc5552014-08-22 18:49:35 +0000123 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
124 SDValue &Offset1) const;
Changpeng Fangb41574a2015-12-22 20:55:23 +0000125 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
Tom Stellard155bbb72014-08-11 22:18:17 +0000126 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
127 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000128 SDValue &TFE, SDValue &DLC) const;
Tom Stellard155bbb72014-08-11 22:18:17 +0000129 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
Tom Stellard1f9939f2015-02-27 14:59:41 +0000130 SDValue &SOffset, SDValue &Offset, SDValue &GLC,
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000131 SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
Tom Stellard7980fc82014-09-25 18:30:26 +0000132 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
Tom Stellardc53861a2015-02-11 00:34:32 +0000133 SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
Tom Stellard7980fc82014-09-25 18:30:26 +0000134 SDValue &SLC) const;
Matt Arsenaultb81495d2017-09-20 05:01:53 +0000135 bool SelectMUBUFScratchOffen(SDNode *Parent,
Matt Arsenault156d3ae2017-05-17 21:02:58 +0000136 SDValue Addr, SDValue &RSrc, SDValue &VAddr,
Matt Arsenault0774ea22017-04-24 19:40:59 +0000137 SDValue &SOffset, SDValue &ImmOffset) const;
Matt Arsenaultb81495d2017-09-20 05:01:53 +0000138 bool SelectMUBUFScratchOffset(SDNode *Parent,
Matt Arsenault156d3ae2017-05-17 21:02:58 +0000139 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
Matt Arsenault0774ea22017-04-24 19:40:59 +0000140 SDValue &Offset) const;
141
Tom Stellard155bbb72014-08-11 22:18:17 +0000142 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
143 SDValue &Offset, SDValue &GLC, SDValue &SLC,
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000144 SDValue &TFE, SDValue &DLC) const;
Tom Stellard7980fc82014-09-25 18:30:26 +0000145 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
Matt Arsenault88701812016-06-09 23:42:48 +0000146 SDValue &Offset, SDValue &SLC) const;
Jan Vesely43b7b5b2016-04-07 19:23:11 +0000147 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
148 SDValue &Offset) const;
Matt Arsenault7757c592016-06-09 23:42:54 +0000149
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000150 bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000151 SDValue &Offset, SDValue &SLC) const;
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000152 bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
Matt Arsenault4e309b02017-07-29 01:03:53 +0000153 SDValue &Offset, SDValue &SLC) const;
154
155 template <bool IsSigned>
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000156 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000157 SDValue &Offset, SDValue &SLC) const;
Matt Arsenault7757c592016-06-09 23:42:54 +0000158
Tom Stellarddee26a22015-08-06 19:28:30 +0000159 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
160 bool &Imm) const;
Matt Arsenault923712b2018-02-09 16:57:57 +0000161 SDValue Expand32BitAddress(SDValue Addr) const;
Tom Stellarddee26a22015-08-06 19:28:30 +0000162 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
163 bool &Imm) const;
164 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
Marek Olsak8973a0a2017-05-24 14:53:50 +0000165 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
Tom Stellarddee26a22015-08-06 19:28:30 +0000166 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
167 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
Marek Olsak8973a0a2017-05-24 14:53:50 +0000168 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
Nicolai Haehnle7968c342016-07-12 08:12:16 +0000169 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000170
171 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000172 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
Tom Stellardb4a313a2014-08-01 00:32:39 +0000173 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
Matt Arsenaultdf58e822017-04-25 21:17:38 +0000174 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
Tom Stellardb4a313a2014-08-01 00:32:39 +0000175 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
176 SDValue &Clamp, SDValue &Omod) const;
Tom Stellarddb5a11f2015-07-13 15:47:57 +0000177 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
178 SDValue &Clamp, SDValue &Omod) const;
Tom Stellard75aadc22012-12-11 21:25:42 +0000179
Matt Arsenault4831ce52015-01-06 23:00:37 +0000180 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
181 SDValue &Clamp,
182 SDValue &Omod) const;
Matt Arsenault1cffa4c2014-11-13 19:49:04 +0000183
Dmitry Preobrazhenskyc512d442017-03-27 15:57:17 +0000184 bool SelectVOP3OMods(SDValue In, SDValue &Src,
185 SDValue &Clamp, SDValue &Omod) const;
186
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000187 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
188 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
189 SDValue &Clamp) const;
190
Dmitry Preobrazhenskyabf28392017-07-21 13:54:11 +0000191 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
192 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
193 SDValue &Clamp) const;
194
195 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
196 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
197 SDValue &Clamp) const;
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000198 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
Matt Arsenault76935122017-09-20 20:28:39 +0000199 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
Dmitry Preobrazhenskyabf28392017-07-21 13:54:11 +0000200
Matt Arsenaulte8c03a22019-03-08 20:58:11 +0000201 SDValue getHi16Elt(SDValue In) const;
Matt Arsenaulte1cd4822017-11-13 00:22:09 +0000202
Justin Bogner95927c02016-05-12 21:03:32 +0000203 void SelectADD_SUB_I64(SDNode *N);
Stanislav Mekhanoshin8f3da702019-04-26 16:37:51 +0000204 void SelectAddcSubb(SDNode *N);
Matt Arsenaultee3f0ac2017-01-30 18:11:38 +0000205 void SelectUADDO_USUBO(SDNode *N);
Justin Bogner95927c02016-05-12 21:03:32 +0000206 void SelectDIV_SCALE(SDNode *N);
Stanislav Mekhanoshin8f3da702019-04-26 16:37:51 +0000207 void SelectDIV_FMAS(SDNode *N);
Matt Arsenault4f6318f2017-11-06 17:04:37 +0000208 void SelectMAD_64_32(SDNode *N);
Tom Stellard8485fa02016-12-07 02:42:15 +0000209 void SelectFMA_W_CHAIN(SDNode *N);
210 void SelectFMUL_W_CHAIN(SDNode *N);
Matt Arsenault9fa3f932014-06-23 18:00:34 +0000211
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000212 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
Marek Olsak9b728682015-03-24 13:40:27 +0000213 uint32_t Offset, uint32_t Width);
Justin Bogner95927c02016-05-12 21:03:32 +0000214 void SelectS_BFEFromShifts(SDNode *N);
215 void SelectS_BFE(SDNode *N);
Matt Arsenault7b1dc2c2016-09-17 02:02:19 +0000216 bool isCBranchSCC(const SDNode *N) const;
Justin Bogner95927c02016-05-12 21:03:32 +0000217 void SelectBRCOND(SDNode *N);
Matt Arsenault0084adc2018-04-30 19:08:16 +0000218 void SelectFMAD_FMA(SDNode *N);
Matt Arsenault88701812016-06-09 23:42:48 +0000219 void SelectATOMIC_CMP_SWAP(SDNode *N);
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000220 void SelectINTRINSIC_W_CHAIN(SDNode *N);
Marek Olsak9b728682015-03-24 13:40:27 +0000221
Tom Stellard20287692017-08-08 04:57:55 +0000222protected:
Tom Stellard75aadc22012-12-11 21:25:42 +0000223 // Include the pieces autogenerated from the target description.
224#include "AMDGPUGenDAGISel.inc"
225};
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000226
Tom Stellard20287692017-08-08 04:57:55 +0000227class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
Tom Stellardc5a154d2018-06-28 23:47:12 +0000228 const R600Subtarget *Subtarget;
Tom Stellardc5a154d2018-06-28 23:47:12 +0000229
230 bool isConstantLoad(const MemSDNode *N, int cbID) const;
231 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
232 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
233 SDValue& Offset);
Tom Stellard20287692017-08-08 04:57:55 +0000234public:
235 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
Matt Arsenault0da63502018-08-31 05:49:54 +0000236 AMDGPUDAGToDAGISel(TM, OptLevel) {}
Tom Stellard20287692017-08-08 04:57:55 +0000237
238 void Select(SDNode *N) override;
239
240 bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
241 SDValue &Offset) override;
242 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
243 SDValue &Offset) override;
Tom Stellardc5a154d2018-06-28 23:47:12 +0000244
245 bool runOnMachineFunction(MachineFunction &MF) override;
Matt Arsenaulte8c03a22019-03-08 20:58:11 +0000246
247 void PreprocessISelDAG() override {}
248
Tom Stellardc5a154d2018-06-28 23:47:12 +0000249protected:
250 // Include the pieces autogenerated from the target description.
251#include "R600GenDAGISel.inc"
Tom Stellard20287692017-08-08 04:57:55 +0000252};
253
Matt Arsenaulte8c03a22019-03-08 20:58:11 +0000254static SDValue stripBitcast(SDValue Val) {
255 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
256}
257
258// Figure out if this is really an extract of the high 16-bits of a dword.
259static bool isExtractHiElt(SDValue In, SDValue &Out) {
260 In = stripBitcast(In);
261 if (In.getOpcode() != ISD::TRUNCATE)
262 return false;
263
264 SDValue Srl = In.getOperand(0);
265 if (Srl.getOpcode() == ISD::SRL) {
266 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
267 if (ShiftAmt->getZExtValue() == 16) {
268 Out = stripBitcast(Srl.getOperand(0));
269 return true;
270 }
271 }
272 }
273
274 return false;
275}
276
277// Look through operations that obscure just looking at the low 16-bits of the
278// same register.
279static SDValue stripExtractLoElt(SDValue In) {
280 if (In.getOpcode() == ISD::TRUNCATE) {
281 SDValue Src = In.getOperand(0);
282 if (Src.getValueType().getSizeInBits() == 32)
283 return stripBitcast(Src);
284 }
285
286 return In;
287}
288
Tom Stellard75aadc22012-12-11 21:25:42 +0000289} // end anonymous namespace
290
Fangrui Song3d76d362018-10-03 03:38:22 +0000291INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
Matt Arsenault7016f132017-08-03 22:30:46 +0000292 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
293INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
Stanislav Mekhanoshin1c538422018-05-25 17:25:12 +0000294INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
Nicolai Haehnle35617ed2018-08-30 14:21:36 +0000295INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
Fangrui Song3d76d362018-10-03 03:38:22 +0000296INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
Matt Arsenault7016f132017-08-03 22:30:46 +0000297 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
298
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000299/// This pass converts a legalized DAG into a AMDGPU-specific
Tom Stellard75aadc22012-12-11 21:25:42 +0000300// DAG, ready for instruction scheduling.
Matt Arsenault7016f132017-08-03 22:30:46 +0000301FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
Konstantin Zhuravlyov60a83732016-10-03 18:47:26 +0000302 CodeGenOpt::Level OptLevel) {
303 return new AMDGPUDAGToDAGISel(TM, OptLevel);
Tom Stellard75aadc22012-12-11 21:25:42 +0000304}
305
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000306/// This pass converts a legalized DAG into a R600-specific
Tom Stellard20287692017-08-08 04:57:55 +0000307// DAG, ready for instruction scheduling.
308FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
309 CodeGenOpt::Level OptLevel) {
310 return new R600DAGToDAGISel(TM, OptLevel);
311}
312
Eric Christopher7792e322015-01-30 23:24:40 +0000313bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Tom Stellard5bfbae52018-07-11 20:59:01 +0000314 Subtarget = &MF.getSubtarget<GCNSubtarget>();
Eric Christopher7792e322015-01-30 23:24:40 +0000315 return SelectionDAGISel::runOnMachineFunction(MF);
Tom Stellard75aadc22012-12-11 21:25:42 +0000316}
317
Matt Arsenaulte8c03a22019-03-08 20:58:11 +0000318bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
319 assert(Subtarget->d16PreservesUnusedBits());
320 MVT VT = N->getValueType(0).getSimpleVT();
321 if (VT != MVT::v2i16 && VT != MVT::v2f16)
322 return false;
323
324 SDValue Lo = N->getOperand(0);
325 SDValue Hi = N->getOperand(1);
326
327 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
328
329 // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
330 // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
331 // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
332
333 // Need to check for possible indirect dependencies on the other half of the
334 // vector to avoid introducing a cycle.
335 if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
336 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
337
338 SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
339 SDValue Ops[] = {
340 LdHi->getChain(), LdHi->getBasePtr(), TiedIn
341 };
342
343 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
344 if (LdHi->getMemoryVT() == MVT::i8) {
345 LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
346 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
347 } else {
348 assert(LdHi->getMemoryVT() == MVT::i16);
349 }
350
351 SDValue NewLoadHi =
352 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
353 Ops, LdHi->getMemoryVT(),
354 LdHi->getMemOperand());
355
356 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
357 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
358 return true;
359 }
360
361 // build_vector (load ptr), hi -> load_d16_lo ptr, hi
362 // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
363 // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
364 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
365 if (LdLo && Lo.hasOneUse()) {
366 SDValue TiedIn = getHi16Elt(Hi);
367 if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
368 return false;
369
370 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
371 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
372 if (LdLo->getMemoryVT() == MVT::i8) {
373 LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
374 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
375 } else {
376 assert(LdLo->getMemoryVT() == MVT::i16);
377 }
378
379 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
380
381 SDValue Ops[] = {
382 LdLo->getChain(), LdLo->getBasePtr(), TiedIn
383 };
384
385 SDValue NewLoadLo =
386 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
387 Ops, LdLo->getMemoryVT(),
388 LdLo->getMemOperand());
389
390 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
391 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
392 return true;
393 }
394
395 return false;
396}
397
398void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
399 if (!Subtarget->d16PreservesUnusedBits())
400 return;
401
402 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
403
404 bool MadeChange = false;
405 while (Position != CurDAG->allnodes_begin()) {
406 SDNode *N = &*--Position;
407 if (N->use_empty())
408 continue;
409
410 switch (N->getOpcode()) {
411 case ISD::BUILD_VECTOR:
412 MadeChange |= matchLoadD16FromBuildVector(N);
413 break;
414 default:
415 break;
416 }
417 }
418
419 if (MadeChange) {
420 CurDAG->RemoveDeadNodes();
421 LLVM_DEBUG(dbgs() << "After PreProcess:\n";
422 CurDAG->dump(););
423 }
424}
425
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000426bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
427 if (TM.Options.NoNaNsFPMath)
428 return true;
429
430 // TODO: Move into isKnownNeverNaN
Amara Emersond28f0cd42017-05-01 15:17:51 +0000431 if (N->getFlags().isDefined())
432 return N->getFlags().hasNoNaNs();
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000433
434 return CurDAG->isKnownNeverNaN(N);
435}
436
Matt Arsenaultfe267752016-07-28 00:32:02 +0000437bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
Tom Stellardc5a154d2018-06-28 23:47:12 +0000438 const SIInstrInfo *TII = Subtarget->getInstrInfo();
Matt Arsenaultfe267752016-07-28 00:32:02 +0000439
440 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
441 return TII->isInlineConstant(C->getAPIntValue());
442
443 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
444 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
445
446 return false;
Tom Stellard7ed0b522014-04-03 20:19:27 +0000447}
448
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000449/// Determine the register class for \p OpNo
Tom Stellarddf94dc32013-08-14 23:24:24 +0000450/// \returns The register class of the virtual register that will be used for
451/// the given operand number \OpNo or NULL if the register class cannot be
452/// determined.
453const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
454 unsigned OpNo) const {
Matt Arsenaultc507cdb2016-11-01 23:22:17 +0000455 if (!N->isMachineOpcode()) {
456 if (N->getOpcode() == ISD::CopyToReg) {
457 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
458 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
459 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
460 return MRI.getRegClass(Reg);
461 }
462
463 const SIRegisterInfo *TRI
Tom Stellard5bfbae52018-07-11 20:59:01 +0000464 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
Matt Arsenaultc507cdb2016-11-01 23:22:17 +0000465 return TRI->getPhysRegClass(Reg);
466 }
467
Matt Arsenault209a7b92014-04-18 07:40:20 +0000468 return nullptr;
Matt Arsenaultc507cdb2016-11-01 23:22:17 +0000469 }
Matt Arsenault209a7b92014-04-18 07:40:20 +0000470
Tom Stellarddf94dc32013-08-14 23:24:24 +0000471 switch (N->getMachineOpcode()) {
472 default: {
Eric Christopherd9134482014-08-04 21:25:23 +0000473 const MCInstrDesc &Desc =
Eric Christopher7792e322015-01-30 23:24:40 +0000474 Subtarget->getInstrInfo()->get(N->getMachineOpcode());
Alexey Samsonov3186eb32013-08-15 07:11:34 +0000475 unsigned OpIdx = Desc.getNumDefs() + OpNo;
476 if (OpIdx >= Desc.getNumOperands())
Matt Arsenault209a7b92014-04-18 07:40:20 +0000477 return nullptr;
Alexey Samsonov3186eb32013-08-15 07:11:34 +0000478 int RegClass = Desc.OpInfo[OpIdx].RegClass;
Matt Arsenault209a7b92014-04-18 07:40:20 +0000479 if (RegClass == -1)
480 return nullptr;
481
Eric Christopher7792e322015-01-30 23:24:40 +0000482 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
Tom Stellarddf94dc32013-08-14 23:24:24 +0000483 }
484 case AMDGPU::REG_SEQUENCE: {
Matt Arsenault209a7b92014-04-18 07:40:20 +0000485 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
Eric Christopherd9134482014-08-04 21:25:23 +0000486 const TargetRegisterClass *SuperRC =
Eric Christopher7792e322015-01-30 23:24:40 +0000487 Subtarget->getRegisterInfo()->getRegClass(RCID);
Matt Arsenault209a7b92014-04-18 07:40:20 +0000488
489 SDValue SubRegOp = N->getOperand(OpNo + 1);
490 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
Eric Christopher7792e322015-01-30 23:24:40 +0000491 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
492 SubRegIdx);
Tom Stellarddf94dc32013-08-14 23:24:24 +0000493 }
494 }
495}
496
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000497SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
Tom Stellard381a94a2015-05-12 15:00:49 +0000498 const SITargetLowering& Lowering =
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000499 *static_cast<const SITargetLowering*>(getTargetLowering());
Tom Stellard381a94a2015-05-12 15:00:49 +0000500
501 // Write max value to m0 before each load operation
502
503 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000504 Val);
Tom Stellard381a94a2015-05-12 15:00:49 +0000505
506 SDValue Glue = M0.getValue(1);
507
508 SmallVector <SDValue, 8> Ops;
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000509 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
510 Ops.push_back(N->getOperand(i));
511
Tom Stellard381a94a2015-05-12 15:00:49 +0000512 Ops.push_back(Glue);
Matt Arsenaulte6667de2017-12-04 22:18:22 +0000513 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
Tom Stellard381a94a2015-05-12 15:00:49 +0000514}
515
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000516SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
517 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
518 !Subtarget->ldsRequiresM0Init())
519 return N;
520 return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
521}
522
Tim Renouff1c7b922018-08-02 22:53:57 +0000523MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
524 EVT VT) const {
525 SDNode *Lo = CurDAG->getMachineNode(
526 AMDGPU::S_MOV_B32, DL, MVT::i32,
527 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
528 SDNode *Hi =
529 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
530 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
531 const SDValue Ops[] = {
532 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
533 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
534 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
535
536 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
537}
538
Matt Arsenault61cb6fa2015-11-11 00:01:36 +0000539static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
Matt Arsenaultf1aebbf2015-11-02 23:30:48 +0000540 switch (NumVectorElts) {
541 case 1:
Marek Olsak79c05872016-11-25 17:37:09 +0000542 return AMDGPU::SReg_32_XM0RegClassID;
Matt Arsenaultf1aebbf2015-11-02 23:30:48 +0000543 case 2:
544 return AMDGPU::SReg_64RegClassID;
Tim Renouf361b5b22019-03-21 12:01:21 +0000545 case 3:
546 return AMDGPU::SGPR_96RegClassID;
Matt Arsenaultf1aebbf2015-11-02 23:30:48 +0000547 case 4:
548 return AMDGPU::SReg_128RegClassID;
Tim Renouf033f99a2019-03-22 10:11:21 +0000549 case 5:
550 return AMDGPU::SGPR_160RegClassID;
Matt Arsenaultf1aebbf2015-11-02 23:30:48 +0000551 case 8:
552 return AMDGPU::SReg_256RegClassID;
553 case 16:
554 return AMDGPU::SReg_512RegClassID;
555 }
556
557 llvm_unreachable("invalid vector size");
558}
559
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000560static bool getConstantValue(SDValue N, uint32_t &Out) {
561 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
562 Out = C->getAPIntValue().getZExtValue();
563 return true;
564 }
565
566 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
567 Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
568 return true;
569 }
570
571 return false;
572}
573
Tom Stellard20287692017-08-08 04:57:55 +0000574void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
Tom Stellard20287692017-08-08 04:57:55 +0000575 EVT VT = N->getValueType(0);
576 unsigned NumVectorElts = VT.getVectorNumElements();
577 EVT EltVT = VT.getVectorElementType();
Tom Stellard20287692017-08-08 04:57:55 +0000578 SDLoc DL(N);
579 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
580
581 if (NumVectorElts == 1) {
582 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
583 RegClass);
584 return;
585 }
586
587 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
588 "supported yet");
589 // 16 = Max Num Vector Elements
590 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
591 // 1 = Vector Register Class
592 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
593
594 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
595 bool IsRegSeq = true;
596 unsigned NOps = N->getNumOperands();
597 for (unsigned i = 0; i < NOps; i++) {
598 // XXX: Why is this here?
599 if (isa<RegisterSDNode>(N->getOperand(i))) {
600 IsRegSeq = false;
601 break;
602 }
Simon Pilgrimede0e402018-05-19 12:46:02 +0000603 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
Tom Stellard20287692017-08-08 04:57:55 +0000604 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
Simon Pilgrimede0e402018-05-19 12:46:02 +0000605 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
Tom Stellard20287692017-08-08 04:57:55 +0000606 }
607 if (NOps != NumVectorElts) {
608 // Fill in the missing undef elements if this was a scalar_to_vector.
Tom Stellard03aa3ae2017-08-08 05:52:00 +0000609 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
Tom Stellard20287692017-08-08 04:57:55 +0000610 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
611 DL, EltVT);
612 for (unsigned i = NOps; i < NumVectorElts; ++i) {
Simon Pilgrimede0e402018-05-19 12:46:02 +0000613 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
Tom Stellard20287692017-08-08 04:57:55 +0000614 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
615 RegSeqArgs[1 + (2 * i) + 1] =
Simon Pilgrimede0e402018-05-19 12:46:02 +0000616 CurDAG->getTargetConstant(Sub, DL, MVT::i32);
Tom Stellard20287692017-08-08 04:57:55 +0000617 }
618 }
619
620 if (!IsRegSeq)
621 SelectCode(N);
622 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
623}
624
Justin Bogner95927c02016-05-12 21:03:32 +0000625void AMDGPUDAGToDAGISel::Select(SDNode *N) {
Tom Stellard75aadc22012-12-11 21:25:42 +0000626 unsigned int Opc = N->getOpcode();
627 if (N->isMachineOpcode()) {
Tim Northover31d093c2013-09-22 08:21:56 +0000628 N->setNodeId(-1);
Justin Bogner95927c02016-05-12 21:03:32 +0000629 return; // Already selected.
Tom Stellard75aadc22012-12-11 21:25:42 +0000630 }
Matt Arsenault78b86702014-04-18 05:19:26 +0000631
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000632 if (isa<AtomicSDNode>(N) ||
Daniil Fukalovd5fca552018-01-17 14:05:05 +0000633 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
Matt Arsenaulta5840c32019-01-22 18:36:06 +0000634 Opc == ISD::ATOMIC_LOAD_FADD ||
Daniil Fukalovd5fca552018-01-17 14:05:05 +0000635 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
636 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000637 N = glueCopyToM0LDSInit(N);
Tom Stellard381a94a2015-05-12 15:00:49 +0000638
Tom Stellard75aadc22012-12-11 21:25:42 +0000639 switch (Opc) {
Matt Arsenault84445dd2017-11-30 22:51:26 +0000640 default:
641 break;
Tom Stellard1f15bff2014-02-25 21:36:18 +0000642 // We are selecting i64 ADD here instead of custom lower it during
643 // DAG legalization, so we can fold some i64 ADDs used for address
644 // calculation into the LOAD and STORE instructions.
Nicolai Haehnle67624af2016-10-14 10:30:00 +0000645 case ISD::ADDC:
646 case ISD::ADDE:
Nicolai Haehnle67624af2016-10-14 10:30:00 +0000647 case ISD::SUBC:
648 case ISD::SUBE: {
Tom Stellard20287692017-08-08 04:57:55 +0000649 if (N->getValueType(0) != MVT::i64)
Tom Stellard1f15bff2014-02-25 21:36:18 +0000650 break;
651
Justin Bogner95927c02016-05-12 21:03:32 +0000652 SelectADD_SUB_I64(N);
653 return;
Tom Stellard1f15bff2014-02-25 21:36:18 +0000654 }
Stanislav Mekhanoshin8f3da702019-04-26 16:37:51 +0000655 case ISD::ADDCARRY:
656 case ISD::SUBCARRY:
657 if (N->getValueType(0) != MVT::i32)
658 break;
659
660 SelectAddcSubb(N);
661 return;
Matt Arsenaultee3f0ac2017-01-30 18:11:38 +0000662 case ISD::UADDO:
663 case ISD::USUBO: {
664 SelectUADDO_USUBO(N);
665 return;
666 }
Tom Stellard8485fa02016-12-07 02:42:15 +0000667 case AMDGPUISD::FMUL_W_CHAIN: {
668 SelectFMUL_W_CHAIN(N);
669 return;
670 }
671 case AMDGPUISD::FMA_W_CHAIN: {
672 SelectFMA_W_CHAIN(N);
673 return;
674 }
675
Matt Arsenault064c2062014-06-11 17:40:32 +0000676 case ISD::SCALAR_TO_VECTOR:
Vincent Lejeune3b6f20e2013-03-05 15:04:49 +0000677 case ISD::BUILD_VECTOR: {
Tom Stellard8e5da412013-08-14 23:24:32 +0000678 EVT VT = N->getValueType(0);
679 unsigned NumVectorElts = VT.getVectorNumElements();
Matt Arsenault5a4ec812018-06-20 19:45:48 +0000680 if (VT.getScalarSizeInBits() == 16) {
681 if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000682 uint32_t LHSVal, RHSVal;
683 if (getConstantValue(N->getOperand(0), LHSVal) &&
684 getConstantValue(N->getOperand(1), RHSVal)) {
685 uint32_t K = LHSVal | (RHSVal << 16);
686 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
687 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
688 return;
689 }
690 }
691
692 break;
693 }
694
Tom Stellard03aa3ae2017-08-08 05:52:00 +0000695 assert(VT.getVectorElementType().bitsEq(MVT::i32));
Tom Stellard20287692017-08-08 04:57:55 +0000696 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
697 SelectBuildVector(N, RegClassID);
Justin Bogner95927c02016-05-12 21:03:32 +0000698 return;
Vincent Lejeune3b6f20e2013-03-05 15:04:49 +0000699 }
Tom Stellard754f80f2013-04-05 23:31:51 +0000700 case ISD::BUILD_PAIR: {
701 SDValue RC, SubReg0, SubReg1;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000702 SDLoc DL(N);
Tom Stellard754f80f2013-04-05 23:31:51 +0000703 if (N->getValueType(0) == MVT::i128) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000704 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
705 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
706 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
Tom Stellard754f80f2013-04-05 23:31:51 +0000707 } else if (N->getValueType(0) == MVT::i64) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000708 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
709 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
710 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
Tom Stellard754f80f2013-04-05 23:31:51 +0000711 } else {
712 llvm_unreachable("Unhandled value type for BUILD_PAIR");
713 }
714 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
715 N->getOperand(1), SubReg1 };
Justin Bogner95927c02016-05-12 21:03:32 +0000716 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
717 N->getValueType(0), Ops));
718 return;
Tom Stellard754f80f2013-04-05 23:31:51 +0000719 }
Tom Stellard7ed0b522014-04-03 20:19:27 +0000720
721 case ISD::Constant:
722 case ISD::ConstantFP: {
Tom Stellard20287692017-08-08 04:57:55 +0000723 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
Tom Stellard7ed0b522014-04-03 20:19:27 +0000724 break;
725
726 uint64_t Imm;
727 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
728 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
729 else {
Tom Stellard3cbe0142014-04-07 19:31:13 +0000730 ConstantSDNode *C = cast<ConstantSDNode>(N);
Tom Stellard7ed0b522014-04-03 20:19:27 +0000731 Imm = C->getZExtValue();
732 }
733
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000734 SDLoc DL(N);
Tim Renouff1c7b922018-08-02 22:53:57 +0000735 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
Justin Bogner95927c02016-05-12 21:03:32 +0000736 return;
Tom Stellard7ed0b522014-04-03 20:19:27 +0000737 }
Matt Arsenault4bf43d42015-09-25 17:27:08 +0000738 case ISD::LOAD:
Matt Arsenault3f8e7a32018-06-22 08:39:52 +0000739 case ISD::STORE:
740 case ISD::ATOMIC_LOAD:
741 case ISD::ATOMIC_STORE: {
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000742 N = glueCopyToM0LDSInit(N);
Tom Stellard096b8c12015-02-04 20:49:49 +0000743 break;
744 }
Matt Arsenault78b86702014-04-18 05:19:26 +0000745
746 case AMDGPUISD::BFE_I32:
747 case AMDGPUISD::BFE_U32: {
Matt Arsenault78b86702014-04-18 05:19:26 +0000748 // There is a scalar version available, but unlike the vector version which
749 // has a separate operand for the offset and width, the scalar version packs
750 // the width and offset into a single operand. Try to move to the scalar
751 // version if the offsets are constant, so that we can try to keep extended
752 // loads of kernel arguments in SGPRs.
753
754 // TODO: Technically we could try to pattern match scalar bitshifts of
755 // dynamic values, but it's probably not useful.
756 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
757 if (!Offset)
758 break;
759
760 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
761 if (!Width)
762 break;
763
764 bool Signed = Opc == AMDGPUISD::BFE_I32;
765
Matt Arsenault78b86702014-04-18 05:19:26 +0000766 uint32_t OffsetVal = Offset->getZExtValue();
767 uint32_t WidthVal = Width->getZExtValue();
768
Justin Bogner95927c02016-05-12 21:03:32 +0000769 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
770 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
771 return;
Matt Arsenault78b86702014-04-18 05:19:26 +0000772 }
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +0000773 case AMDGPUISD::DIV_SCALE: {
Justin Bogner95927c02016-05-12 21:03:32 +0000774 SelectDIV_SCALE(N);
775 return;
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +0000776 }
Stanislav Mekhanoshin8f3da702019-04-26 16:37:51 +0000777 case AMDGPUISD::DIV_FMAS: {
778 SelectDIV_FMAS(N);
779 return;
780 }
Matt Arsenault4f6318f2017-11-06 17:04:37 +0000781 case AMDGPUISD::MAD_I64_I32:
782 case AMDGPUISD::MAD_U64_U32: {
783 SelectMAD_64_32(N);
784 return;
785 }
Tom Stellard3457a842014-10-09 19:06:00 +0000786 case ISD::CopyToReg: {
787 const SITargetLowering& Lowering =
788 *static_cast<const SITargetLowering*>(getTargetLowering());
Matt Arsenault0d0d6c22017-04-12 21:58:23 +0000789 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
Tom Stellard3457a842014-10-09 19:06:00 +0000790 break;
791 }
Marek Olsak9b728682015-03-24 13:40:27 +0000792 case ISD::AND:
793 case ISD::SRL:
794 case ISD::SRA:
Matt Arsenault7e8de012016-04-22 22:59:16 +0000795 case ISD::SIGN_EXTEND_INREG:
Tom Stellard20287692017-08-08 04:57:55 +0000796 if (N->getValueType(0) != MVT::i32)
Marek Olsak9b728682015-03-24 13:40:27 +0000797 break;
798
Justin Bogner95927c02016-05-12 21:03:32 +0000799 SelectS_BFE(N);
800 return;
Tom Stellardbc4497b2016-02-12 23:45:29 +0000801 case ISD::BRCOND:
Justin Bogner95927c02016-05-12 21:03:32 +0000802 SelectBRCOND(N);
803 return;
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000804 case ISD::FMAD:
Matt Arsenault0084adc2018-04-30 19:08:16 +0000805 case ISD::FMA:
806 SelectFMAD_FMA(N);
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000807 return;
Matt Arsenault88701812016-06-09 23:42:48 +0000808 case AMDGPUISD::ATOMIC_CMP_SWAP:
809 SelectATOMIC_CMP_SWAP(N);
810 return;
Matt Arsenault709374d2018-08-01 20:13:58 +0000811 case AMDGPUISD::CVT_PKRTZ_F16_F32:
812 case AMDGPUISD::CVT_PKNORM_I16_F32:
813 case AMDGPUISD::CVT_PKNORM_U16_F32:
814 case AMDGPUISD::CVT_PK_U16_U32:
815 case AMDGPUISD::CVT_PK_I16_I32: {
816 // Hack around using a legal type if f16 is illegal.
817 if (N->getValueType(0) == MVT::i32) {
818 MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
819 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
820 { N->getOperand(0), N->getOperand(1) });
821 SelectCode(N);
822 return;
823 }
Matt Arsenaultcdd191d2019-01-28 20:14:49 +0000824
825 break;
826 }
827 case ISD::INTRINSIC_W_CHAIN: {
828 SelectINTRINSIC_W_CHAIN(N);
829 return;
Matt Arsenault709374d2018-08-01 20:13:58 +0000830 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000831 }
Tom Stellard3457a842014-10-09 19:06:00 +0000832
Justin Bogner95927c02016-05-12 21:03:32 +0000833 SelectCode(N);
Tom Stellard365366f2013-01-23 02:09:06 +0000834}
835
Tom Stellardbc4497b2016-02-12 23:45:29 +0000836bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
837 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
Nicolai Haehnle05b127d2016-04-14 17:42:35 +0000838 const Instruction *Term = BB->getTerminator();
839 return Term->getMetadata("amdgpu.uniform") ||
840 Term->getMetadata("structurizecfg.uniform");
Tom Stellardbc4497b2016-02-12 23:45:29 +0000841}
842
Mehdi Amini117296c2016-10-01 02:56:57 +0000843StringRef AMDGPUDAGToDAGISel::getPassName() const {
Tom Stellard75aadc22012-12-11 21:25:42 +0000844 return "AMDGPU DAG->DAG Pattern Instruction Selection";
845}
846
Tom Stellard41fc7852013-07-23 01:48:42 +0000847//===----------------------------------------------------------------------===//
848// Complex Patterns
849//===----------------------------------------------------------------------===//
Tom Stellard75aadc22012-12-11 21:25:42 +0000850
Tom Stellard75aadc22012-12-11 21:25:42 +0000851bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
Tom Stellard20287692017-08-08 04:57:55 +0000852 SDValue &Offset) {
853 return false;
Tom Stellard75aadc22012-12-11 21:25:42 +0000854}
855
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000856bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
857 SDValue &Offset) {
858 ConstantSDNode *C;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000859 SDLoc DL(Addr);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000860
861 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Tom Stellardc5a154d2018-06-28 23:47:12 +0000862 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000863 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
Jan Vesely06200bd2017-01-06 21:00:46 +0000864 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
865 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
Tom Stellardc5a154d2018-06-28 23:47:12 +0000866 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Jan Vesely06200bd2017-01-06 21:00:46 +0000867 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000868 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
869 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
870 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000871 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000872 } else {
873 Base = Addr;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000874 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000875 }
876
877 return true;
878}
Christian Konigd910b7d2013-02-26 17:52:16 +0000879
Matt Arsenault84445dd2017-11-30 22:51:26 +0000880// FIXME: Should only handle addcarry/subcarry
Justin Bogner95927c02016-05-12 21:03:32 +0000881void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
Matt Arsenault9fa3f932014-06-23 18:00:34 +0000882 SDLoc DL(N);
883 SDValue LHS = N->getOperand(0);
884 SDValue RHS = N->getOperand(1);
885
Nicolai Haehnle67624af2016-10-14 10:30:00 +0000886 unsigned Opcode = N->getOpcode();
887 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
888 bool ProduceCarry =
889 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
Matt Arsenault84445dd2017-11-30 22:51:26 +0000890 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000891
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000892 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
893 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
Matt Arsenault9fa3f932014-06-23 18:00:34 +0000894
895 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
896 DL, MVT::i32, LHS, Sub0);
897 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
898 DL, MVT::i32, LHS, Sub1);
899
900 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
901 DL, MVT::i32, RHS, Sub0);
902 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
903 DL, MVT::i32, RHS, Sub1);
904
905 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
Matt Arsenault9fa3f932014-06-23 18:00:34 +0000906
Tom Stellard80942a12014-09-05 14:07:59 +0000907 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000908 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
909
Nicolai Haehnle67624af2016-10-14 10:30:00 +0000910 SDNode *AddLo;
911 if (!ConsumeCarry) {
912 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
913 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
914 } else {
915 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
916 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
917 }
918 SDValue AddHiArgs[] = {
919 SDValue(Hi0, 0),
920 SDValue(Hi1, 0),
921 SDValue(AddLo, 1)
922 };
923 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
Matt Arsenault9fa3f932014-06-23 18:00:34 +0000924
Nicolai Haehnle67624af2016-10-14 10:30:00 +0000925 SDValue RegSequenceArgs[] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000926 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
Matt Arsenault9fa3f932014-06-23 18:00:34 +0000927 SDValue(AddLo,0),
928 Sub0,
929 SDValue(AddHi,0),
930 Sub1,
931 };
Nicolai Haehnle67624af2016-10-14 10:30:00 +0000932 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
933 MVT::i64, RegSequenceArgs);
934
935 if (ProduceCarry) {
936 // Replace the carry-use
Nirav Dave3264c1b2018-03-19 20:19:46 +0000937 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
Nicolai Haehnle67624af2016-10-14 10:30:00 +0000938 }
939
940 // Replace the remaining uses.
Nirav Dave3264c1b2018-03-19 20:19:46 +0000941 ReplaceNode(N, RegSequence);
Matt Arsenault9fa3f932014-06-23 18:00:34 +0000942}
943
Stanislav Mekhanoshin8f3da702019-04-26 16:37:51 +0000944void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
945 SDLoc DL(N);
946 SDValue LHS = N->getOperand(0);
947 SDValue RHS = N->getOperand(1);
948 SDValue CI = N->getOperand(2);
949
950 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
951 : AMDGPU::V_SUBB_U32_e64;
952 CurDAG->SelectNodeTo(
953 N, Opc, N->getVTList(),
954 {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
955}
956
Matt Arsenaultee3f0ac2017-01-30 18:11:38 +0000957void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
958 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
959 // carry out despite the _i32 name. These were renamed in VI to _U32.
960 // FIXME: We should probably rename the opcodes here.
961 unsigned Opc = N->getOpcode() == ISD::UADDO ?
962 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
963
Michael Liaoeea51772019-03-20 20:18:56 +0000964 CurDAG->SelectNodeTo(
965 N, Opc, N->getVTList(),
966 {N->getOperand(0), N->getOperand(1),
967 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
Matt Arsenaultee3f0ac2017-01-30 18:11:38 +0000968}
969
Tom Stellard8485fa02016-12-07 02:42:15 +0000970void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
971 SDLoc SL(N);
972 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
973 SDValue Ops[10];
974
975 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
976 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
977 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
978 Ops[8] = N->getOperand(0);
979 Ops[9] = N->getOperand(4);
980
981 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
982}
983
984void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
985 SDLoc SL(N);
NAKAMURA Takumi6f43bd42017-10-18 13:31:28 +0000986 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
Tom Stellard8485fa02016-12-07 02:42:15 +0000987 SDValue Ops[8];
988
989 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
990 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
991 Ops[6] = N->getOperand(0);
992 Ops[7] = N->getOperand(3);
993
994 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
995}
996
Matt Arsenault044f1d12015-02-14 04:24:28 +0000997// We need to handle this here because tablegen doesn't support matching
998// instructions with multiple outputs.
Justin Bogner95927c02016-05-12 21:03:32 +0000999void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +00001000 SDLoc SL(N);
1001 EVT VT = N->getValueType(0);
1002
1003 assert(VT == MVT::f32 || VT == MVT::f64);
1004
1005 unsigned Opc
1006 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1007
Matt Arsenault3b99f122017-01-19 06:04:12 +00001008 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1009 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +00001010}
1011
Stanislav Mekhanoshin8f3da702019-04-26 16:37:51 +00001012void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
1013 SDLoc SL(N);
1014 EVT VT = N->getValueType(0);
1015
1016 assert(VT == MVT::f32 || VT == MVT::f64);
1017
1018 unsigned Opc
1019 = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
1020
1021 SDValue CarryIn = N->getOperand(3);
1022 // V_DIV_FMAS implicitly reads VCC.
1023 SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
1024 AMDGPU::VCC, CarryIn, SDValue());
1025
1026 SDValue Ops[10];
1027
1028 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1029 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
1030 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
1031
1032 Ops[8] = VCC;
1033 Ops[9] = VCC.getValue(1);
1034
1035 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1036}
1037
Matt Arsenault4f6318f2017-11-06 17:04:37 +00001038// We need to handle this here because tablegen doesn't support matching
1039// instructions with multiple outputs.
1040void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1041 SDLoc SL(N);
1042 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1043 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1044
1045 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1046 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1047 Clamp };
1048 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1049}
1050
Matt Arsenaultcdd191d2019-01-28 20:14:49 +00001051bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
Tom Stellard85e8b6d2014-08-22 18:49:33 +00001052 unsigned OffsetBits) const {
Tom Stellard85e8b6d2014-08-22 18:49:33 +00001053 if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1054 (OffsetBits == 8 && !isUInt<8>(Offset)))
1055 return false;
1056
Matt Arsenault706f9302015-07-06 16:01:58 +00001057 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
1058 Subtarget->unsafeDSOffsetFoldingEnabled())
Tom Stellard85e8b6d2014-08-22 18:49:33 +00001059 return true;
1060
1061 // On Southern Islands instruction with a negative base value and an offset
1062 // don't seem to work.
1063 return CurDAG->SignBitIsZero(Base);
1064}
1065
1066bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1067 SDValue &Offset) const {
Tom Stellard92b24f32016-04-29 14:34:26 +00001068 SDLoc DL(Addr);
Tom Stellard85e8b6d2014-08-22 18:49:33 +00001069 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1070 SDValue N0 = Addr.getOperand(0);
1071 SDValue N1 = Addr.getOperand(1);
1072 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1073 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1074 // (add n0, c0)
1075 Base = N0;
Tom Stellard92b24f32016-04-29 14:34:26 +00001076 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
Tom Stellard85e8b6d2014-08-22 18:49:33 +00001077 return true;
1078 }
Matt Arsenault966a94f2015-09-08 19:34:22 +00001079 } else if (Addr.getOpcode() == ISD::SUB) {
1080 // sub C, x -> add (sub 0, x), C
1081 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1082 int64_t ByteOffset = C->getSExtValue();
1083 if (isUInt<16>(ByteOffset)) {
Matt Arsenault966a94f2015-09-08 19:34:22 +00001084 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
Tom Stellard85e8b6d2014-08-22 18:49:33 +00001085
Matt Arsenault966a94f2015-09-08 19:34:22 +00001086 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1087 // the known bits in isDSOffsetLegal. We need to emit the selected node
1088 // here, so this is thrown away.
1089 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1090 Zero, Addr.getOperand(1));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001091
Matt Arsenault966a94f2015-09-08 19:34:22 +00001092 if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
Tim Renoufcfdfba92019-03-18 19:35:44 +00001093 SmallVector<SDValue, 3> Opnds;
1094 Opnds.push_back(Zero);
1095 Opnds.push_back(Addr.getOperand(1));
Matt Arsenault84445dd2017-11-30 22:51:26 +00001096
Tim Renoufcfdfba92019-03-18 19:35:44 +00001097 // FIXME: Select to VOP3 version for with-carry.
1098 unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1099 if (Subtarget->hasAddNoCarry()) {
1100 SubOp = AMDGPU::V_SUB_U32_e64;
Michael Liaoeea51772019-03-20 20:18:56 +00001101 Opnds.push_back(
1102 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
Tim Renoufcfdfba92019-03-18 19:35:44 +00001103 }
1104
1105 MachineSDNode *MachineSub =
1106 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
Matt Arsenault966a94f2015-09-08 19:34:22 +00001107
1108 Base = SDValue(MachineSub, 0);
Tom Stellard26a2ab72016-06-10 00:01:04 +00001109 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
Matt Arsenault966a94f2015-09-08 19:34:22 +00001110 return true;
1111 }
1112 }
1113 }
1114 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1115 // If we have a constant address, prefer to put the constant into the
1116 // offset. This can save moves to load the constant address since multiple
1117 // operations can share the zero base address register, and enables merging
1118 // into read2 / write2 instructions.
1119
1120 SDLoc DL(Addr);
1121
Matt Arsenaulte775f5f2014-10-14 17:21:19 +00001122 if (isUInt<16>(CAddr->getZExtValue())) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001123 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
Tom Stellardc8d79202014-10-15 21:08:59 +00001124 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001125 DL, MVT::i32, Zero);
Tom Stellardc8d79202014-10-15 21:08:59 +00001126 Base = SDValue(MovZero, 0);
Tom Stellard26a2ab72016-06-10 00:01:04 +00001127 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
Matt Arsenaulte775f5f2014-10-14 17:21:19 +00001128 return true;
1129 }
1130 }
1131
Tom Stellard85e8b6d2014-08-22 18:49:33 +00001132 // default case
1133 Base = Addr;
Matt Arsenault966a94f2015-09-08 19:34:22 +00001134 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
Tom Stellard85e8b6d2014-08-22 18:49:33 +00001135 return true;
1136}
1137
Matt Arsenault966a94f2015-09-08 19:34:22 +00001138// TODO: If offset is too big, put low 16-bit into offset.
Tom Stellardf3fc5552014-08-22 18:49:35 +00001139bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1140 SDValue &Offset0,
1141 SDValue &Offset1) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001142 SDLoc DL(Addr);
1143
Tom Stellardf3fc5552014-08-22 18:49:35 +00001144 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1145 SDValue N0 = Addr.getOperand(0);
1146 SDValue N1 = Addr.getOperand(1);
1147 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1148 unsigned DWordOffset0 = C1->getZExtValue() / 4;
1149 unsigned DWordOffset1 = DWordOffset0 + 1;
1150 // (add n0, c0)
1151 if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1152 Base = N0;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001153 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1154 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
Tom Stellardf3fc5552014-08-22 18:49:35 +00001155 return true;
1156 }
Matt Arsenault966a94f2015-09-08 19:34:22 +00001157 } else if (Addr.getOpcode() == ISD::SUB) {
1158 // sub C, x -> add (sub 0, x), C
1159 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1160 unsigned DWordOffset0 = C->getZExtValue() / 4;
1161 unsigned DWordOffset1 = DWordOffset0 + 1;
Tom Stellardf3fc5552014-08-22 18:49:35 +00001162
Matt Arsenault966a94f2015-09-08 19:34:22 +00001163 if (isUInt<8>(DWordOffset0)) {
1164 SDLoc DL(Addr);
1165 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1166
1167 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1168 // the known bits in isDSOffsetLegal. We need to emit the selected node
1169 // here, so this is thrown away.
1170 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1171 Zero, Addr.getOperand(1));
1172
1173 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
Tim Renoufcfdfba92019-03-18 19:35:44 +00001174 SmallVector<SDValue, 3> Opnds;
1175 Opnds.push_back(Zero);
1176 Opnds.push_back(Addr.getOperand(1));
1177 unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1178 if (Subtarget->hasAddNoCarry()) {
1179 SubOp = AMDGPU::V_SUB_U32_e64;
Michael Liaoeea51772019-03-20 20:18:56 +00001180 Opnds.push_back(
1181 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
Tim Renoufcfdfba92019-03-18 19:35:44 +00001182 }
Matt Arsenault84445dd2017-11-30 22:51:26 +00001183
Matt Arsenault966a94f2015-09-08 19:34:22 +00001184 MachineSDNode *MachineSub
Tim Renoufcfdfba92019-03-18 19:35:44 +00001185 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
Matt Arsenault966a94f2015-09-08 19:34:22 +00001186
1187 Base = SDValue(MachineSub, 0);
1188 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1189 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1190 return true;
1191 }
1192 }
1193 }
1194 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
Matt Arsenault1a74aff2014-10-15 18:06:43 +00001195 unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1196 unsigned DWordOffset1 = DWordOffset0 + 1;
1197 assert(4 * DWordOffset0 == CAddr->getZExtValue());
1198
1199 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001200 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
Matt Arsenault1a74aff2014-10-15 18:06:43 +00001201 MachineSDNode *MovZero
1202 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001203 DL, MVT::i32, Zero);
Matt Arsenault1a74aff2014-10-15 18:06:43 +00001204 Base = SDValue(MovZero, 0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001205 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1206 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
Matt Arsenault1a74aff2014-10-15 18:06:43 +00001207 return true;
1208 }
1209 }
1210
Tom Stellardf3fc5552014-08-22 18:49:35 +00001211 // default case
Matt Arsenault0efdd062016-09-09 22:29:28 +00001212
Tom Stellardf3fc5552014-08-22 18:49:35 +00001213 Base = Addr;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001214 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1215 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
Tom Stellardf3fc5552014-08-22 18:49:35 +00001216 return true;
1217}
1218
Changpeng Fangb41574a2015-12-22 20:55:23 +00001219bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Tom Stellard155bbb72014-08-11 22:18:17 +00001220 SDValue &VAddr, SDValue &SOffset,
1221 SDValue &Offset, SDValue &Offen,
1222 SDValue &Idxen, SDValue &Addr64,
1223 SDValue &GLC, SDValue &SLC,
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001224 SDValue &TFE, SDValue &DLC) const {
Changpeng Fangb41574a2015-12-22 20:55:23 +00001225 // Subtarget prefers to use flat instruction
1226 if (Subtarget->useFlatForGlobal())
1227 return false;
1228
Tom Stellardb02c2682014-06-24 23:33:07 +00001229 SDLoc DL(Addr);
1230
Jan Vesely43b7b5b2016-04-07 19:23:11 +00001231 if (!GLC.getNode())
1232 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1233 if (!SLC.getNode())
1234 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001235 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001236 DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
Tom Stellard155bbb72014-08-11 22:18:17 +00001237
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001238 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1239 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1240 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1241 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
Tom Stellard155bbb72014-08-11 22:18:17 +00001242
Tim Renouff1c7b922018-08-02 22:53:57 +00001243 ConstantSDNode *C1 = nullptr;
1244 SDValue N0 = Addr;
Tom Stellardb02c2682014-06-24 23:33:07 +00001245 if (CurDAG->isBaseWithConstantOffset(Addr)) {
Tim Renouff1c7b922018-08-02 22:53:57 +00001246 C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1247 if (isUInt<32>(C1->getZExtValue()))
1248 N0 = Addr.getOperand(0);
1249 else
1250 C1 = nullptr;
Tom Stellardb02c2682014-06-24 23:33:07 +00001251 }
Tom Stellard94b72312015-02-11 00:34:35 +00001252
Tim Renouff1c7b922018-08-02 22:53:57 +00001253 if (N0.getOpcode() == ISD::ADD) {
1254 // (add N2, N3) -> addr64, or
1255 // (add (add N2, N3), C1) -> addr64
1256 SDValue N2 = N0.getOperand(0);
1257 SDValue N3 = N0.getOperand(1);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001258 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
Tim Renouff1c7b922018-08-02 22:53:57 +00001259
1260 if (N2->isDivergent()) {
1261 if (N3->isDivergent()) {
1262 // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1263 // addr64, and construct the resource from a 0 address.
1264 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1265 VAddr = N0;
1266 } else {
1267 // N2 is divergent, N3 is not.
1268 Ptr = N3;
1269 VAddr = N2;
1270 }
1271 } else {
1272 // N2 is not divergent.
1273 Ptr = N2;
1274 VAddr = N3;
1275 }
1276 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1277 } else if (N0->isDivergent()) {
1278 // N0 is divergent. Use it as the addr64, and construct the resource from a
1279 // 0 address.
1280 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1281 VAddr = N0;
1282 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1283 } else {
1284 // N0 -> offset, or
1285 // (N0 + C1) -> offset
1286 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Tom Stellard155bbb72014-08-11 22:18:17 +00001287 Ptr = N0;
Tim Renouff1c7b922018-08-02 22:53:57 +00001288 }
1289
1290 if (!C1) {
1291 // No offset.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001292 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
Changpeng Fangb41574a2015-12-22 20:55:23 +00001293 return true;
Tom Stellardb02c2682014-06-24 23:33:07 +00001294 }
1295
Tim Renouff1c7b922018-08-02 22:53:57 +00001296 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1297 // Legal offset for instruction.
1298 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1299 return true;
1300 }
Changpeng Fangb41574a2015-12-22 20:55:23 +00001301
Tim Renouff1c7b922018-08-02 22:53:57 +00001302 // Illegal offset, store it in soffset.
1303 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1304 SOffset =
1305 SDValue(CurDAG->getMachineNode(
1306 AMDGPU::S_MOV_B32, DL, MVT::i32,
1307 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1308 0);
Changpeng Fangb41574a2015-12-22 20:55:23 +00001309 return true;
Tom Stellard155bbb72014-08-11 22:18:17 +00001310}
1311
1312bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
Tom Stellardc53861a2015-02-11 00:34:32 +00001313 SDValue &VAddr, SDValue &SOffset,
Tom Stellard1f9939f2015-02-27 14:59:41 +00001314 SDValue &Offset, SDValue &GLC,
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001315 SDValue &SLC, SDValue &TFE,
1316 SDValue &DLC) const {
Tom Stellard1f9939f2015-02-27 14:59:41 +00001317 SDValue Ptr, Offen, Idxen, Addr64;
Tom Stellard155bbb72014-08-11 22:18:17 +00001318
Tom Stellard70580f82015-07-20 14:28:41 +00001319 // addr64 bit was removed for volcanic islands.
1320 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1321 return false;
1322
Changpeng Fangb41574a2015-12-22 20:55:23 +00001323 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001324 GLC, SLC, TFE, DLC))
Changpeng Fangb41574a2015-12-22 20:55:23 +00001325 return false;
Tom Stellard155bbb72014-08-11 22:18:17 +00001326
1327 ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1328 if (C->getSExtValue()) {
1329 SDLoc DL(Addr);
Matt Arsenault485defe2014-11-05 19:01:17 +00001330
1331 const SITargetLowering& Lowering =
1332 *static_cast<const SITargetLowering*>(getTargetLowering());
1333
1334 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
Tom Stellard155bbb72014-08-11 22:18:17 +00001335 return true;
1336 }
Matt Arsenault485defe2014-11-05 19:01:17 +00001337
Tom Stellard155bbb72014-08-11 22:18:17 +00001338 return false;
1339}
1340
Tom Stellard7980fc82014-09-25 18:30:26 +00001341bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
Tom Stellardc53861a2015-02-11 00:34:32 +00001342 SDValue &VAddr, SDValue &SOffset,
NAKAMURA Takumi0a7d0ad2015-09-22 11:15:07 +00001343 SDValue &Offset,
1344 SDValue &SLC) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001345 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001346 SDValue GLC, TFE, DLC;
Tom Stellard7980fc82014-09-25 18:30:26 +00001347
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001348 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
Tom Stellard7980fc82014-09-25 18:30:26 +00001349}
1350
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001351static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1352 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1353 return PSV && PSV->isStack();
Matt Arsenaultac0fc842016-09-17 16:09:55 +00001354}
1355
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001356std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1357 const MachineFunction &MF = CurDAG->getMachineFunction();
1358 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1359
1360 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1361 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1362 FI->getValueType(0));
1363
1364 // If we can resolve this to a frame index access, this is relative to the
1365 // frame pointer SGPR.
1366 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1367 MVT::i32));
1368 }
1369
1370 // If we don't know this private access is a local stack object, it needs to
1371 // be relative to the entry point's scratch wave offset register.
1372 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1373 MVT::i32));
1374}
1375
Matt Arsenaultb81495d2017-09-20 05:01:53 +00001376bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001377 SDValue Addr, SDValue &Rsrc,
Matt Arsenault0774ea22017-04-24 19:40:59 +00001378 SDValue &VAddr, SDValue &SOffset,
1379 SDValue &ImmOffset) const {
Tom Stellardb02094e2014-07-21 15:45:01 +00001380
1381 SDLoc DL(Addr);
1382 MachineFunction &MF = CurDAG->getMachineFunction();
Matt Arsenault0e3d3892015-11-30 21:15:53 +00001383 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Tom Stellardb02094e2014-07-21 15:45:01 +00001384
Matt Arsenault0e3d3892015-11-30 21:15:53 +00001385 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
Tom Stellardb02094e2014-07-21 15:45:01 +00001386
Matt Arsenault0774ea22017-04-24 19:40:59 +00001387 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1388 unsigned Imm = CAddr->getZExtValue();
Matt Arsenault0774ea22017-04-24 19:40:59 +00001389
1390 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1391 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1392 DL, MVT::i32, HighBits);
1393 VAddr = SDValue(MovHighBits, 0);
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001394
1395 // In a call sequence, stores to the argument stack area are relative to the
1396 // stack pointer.
Matt Arsenaultb81495d2017-09-20 05:01:53 +00001397 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001398 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1399 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
1400
1401 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
Matt Arsenault0774ea22017-04-24 19:40:59 +00001402 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1403 return true;
1404 }
1405
Tom Stellardb02094e2014-07-21 15:45:01 +00001406 if (CurDAG->isBaseWithConstantOffset(Addr)) {
Matt Arsenault0774ea22017-04-24 19:40:59 +00001407 // (add n0, c1)
1408
Tom Stellard78655fc2015-07-16 19:40:09 +00001409 SDValue N0 = Addr.getOperand(0);
Tom Stellardb02094e2014-07-21 15:45:01 +00001410 SDValue N1 = Addr.getOperand(1);
Matt Arsenaultcd099612016-02-24 04:55:29 +00001411
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +00001412 // Offsets in vaddr must be positive if range checking is enabled.
Matt Arsenault45b98182017-11-15 00:45:43 +00001413 //
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +00001414 // The total computation of vaddr + soffset + offset must not overflow. If
1415 // vaddr is negative, even if offset is 0 the sgpr offset add will end up
Matt Arsenault45b98182017-11-15 00:45:43 +00001416 // overflowing.
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +00001417 //
1418 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1419 // always perform a range check. If a negative vaddr base index was used,
1420 // this would fail the range check. The overall address computation would
1421 // compute a valid address, but this doesn't happen due to the range
1422 // check. For out-of-bounds MUBUF loads, a 0 is returned.
1423 //
1424 // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1425 // MUBUF vaddr, but not on older subtargets which can only do this if the
1426 // sign bit is known 0.
Matt Arsenaultcd099612016-02-24 04:55:29 +00001427 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
Matt Arsenault45b98182017-11-15 00:45:43 +00001428 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +00001429 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1430 CurDAG->SignBitIsZero(N0))) {
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001431 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
Matt Arsenaultcd099612016-02-24 04:55:29 +00001432 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1433 return true;
Tom Stellardb02094e2014-07-21 15:45:01 +00001434 }
1435 }
1436
Tom Stellardb02094e2014-07-21 15:45:01 +00001437 // (node)
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001438 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001439 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
Tom Stellardb02094e2014-07-21 15:45:01 +00001440 return true;
1441}
1442
Matt Arsenaultb81495d2017-09-20 05:01:53 +00001443bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001444 SDValue Addr,
Matt Arsenault0774ea22017-04-24 19:40:59 +00001445 SDValue &SRsrc,
1446 SDValue &SOffset,
1447 SDValue &Offset) const {
1448 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
Marek Olsakffadcb72017-11-09 01:52:17 +00001449 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
Matt Arsenault0774ea22017-04-24 19:40:59 +00001450 return false;
1451
1452 SDLoc DL(Addr);
1453 MachineFunction &MF = CurDAG->getMachineFunction();
1454 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1455
1456 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001457
Matt Arsenaultb81495d2017-09-20 05:01:53 +00001458 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
Matt Arsenault156d3ae2017-05-17 21:02:58 +00001459 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1460 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
1461
1462 // FIXME: Get from MachinePointerInfo? We should only be using the frame
1463 // offset if we know this is in a call sequence.
1464 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1465
Matt Arsenault0774ea22017-04-24 19:40:59 +00001466 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1467 return true;
1468}
1469
Tom Stellard155bbb72014-08-11 22:18:17 +00001470bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1471 SDValue &SOffset, SDValue &Offset,
1472 SDValue &GLC, SDValue &SLC,
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001473 SDValue &TFE, SDValue &DLC) const {
Tom Stellard155bbb72014-08-11 22:18:17 +00001474 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
Tom Stellard794c8c02014-12-02 17:05:41 +00001475 const SIInstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +00001476 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
Tom Stellardb02094e2014-07-21 15:45:01 +00001477
Changpeng Fangb41574a2015-12-22 20:55:23 +00001478 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001479 GLC, SLC, TFE, DLC))
Changpeng Fangb41574a2015-12-22 20:55:23 +00001480 return false;
Tom Stellardb02094e2014-07-21 15:45:01 +00001481
Tom Stellard155bbb72014-08-11 22:18:17 +00001482 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1483 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1484 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
Tom Stellard794c8c02014-12-02 17:05:41 +00001485 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
Tom Stellard155bbb72014-08-11 22:18:17 +00001486 APInt::getAllOnesValue(32).getZExtValue(); // Size
1487 SDLoc DL(Addr);
Matt Arsenaultf3cd4512014-11-05 19:01:19 +00001488
1489 const SITargetLowering& Lowering =
1490 *static_cast<const SITargetLowering*>(getTargetLowering());
1491
1492 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
Tom Stellard155bbb72014-08-11 22:18:17 +00001493 return true;
1494 }
1495 return false;
Tom Stellardb02094e2014-07-21 15:45:01 +00001496}
1497
Tom Stellard7980fc82014-09-25 18:30:26 +00001498bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
Jan Vesely43b7b5b2016-04-07 19:23:11 +00001499 SDValue &Soffset, SDValue &Offset
1500 ) const {
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001501 SDValue GLC, SLC, TFE, DLC;
Jan Vesely43b7b5b2016-04-07 19:23:11 +00001502
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001503 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
Jan Vesely43b7b5b2016-04-07 19:23:11 +00001504}
1505bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
Tom Stellard7980fc82014-09-25 18:30:26 +00001506 SDValue &Soffset, SDValue &Offset,
Matt Arsenault88701812016-06-09 23:42:48 +00001507 SDValue &SLC) const {
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001508 SDValue GLC, TFE, DLC;
Tom Stellard7980fc82014-09-25 18:30:26 +00001509
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001510 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
Tom Stellard7980fc82014-09-25 18:30:26 +00001511}
1512
Matt Arsenault4e309b02017-07-29 01:03:53 +00001513template <bool IsSigned>
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001514bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1515 SDValue Addr,
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +00001516 SDValue &VAddr,
1517 SDValue &Offset,
1518 SDValue &SLC) const {
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001519 return static_cast<const SITargetLowering*>(getTargetLowering())->
1520 SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
Matt Arsenault7757c592016-06-09 23:42:54 +00001521}
1522
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001523bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1524 SDValue Addr,
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +00001525 SDValue &VAddr,
1526 SDValue &Offset,
1527 SDValue &SLC) const {
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001528 return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
Matt Arsenault4e309b02017-07-29 01:03:53 +00001529}
1530
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001531bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1532 SDValue Addr,
Matt Arsenault4e309b02017-07-29 01:03:53 +00001533 SDValue &VAddr,
1534 SDValue &Offset,
1535 SDValue &SLC) const {
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001536 return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +00001537}
1538
Tom Stellarddee26a22015-08-06 19:28:30 +00001539bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1540 SDValue &Offset, bool &Imm) const {
1541
1542 // FIXME: Handle non-constant offsets.
1543 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1544 if (!C)
1545 return false;
1546
1547 SDLoc SL(ByteOffsetNode);
Tom Stellard5bfbae52018-07-11 20:59:01 +00001548 GCNSubtarget::Generation Gen = Subtarget->getGeneration();
Tom Stellarddee26a22015-08-06 19:28:30 +00001549 int64_t ByteOffset = C->getSExtValue();
Tom Stellard08efb7e2017-01-27 18:41:14 +00001550 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
Tom Stellarddee26a22015-08-06 19:28:30 +00001551
Tom Stellard08efb7e2017-01-27 18:41:14 +00001552 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
Tom Stellarddee26a22015-08-06 19:28:30 +00001553 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1554 Imm = true;
1555 return true;
1556 }
1557
Tom Stellard217361c2015-08-06 19:28:38 +00001558 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1559 return false;
1560
Marek Olsak8973a0a2017-05-24 14:53:50 +00001561 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1562 // 32-bit Immediates are supported on Sea Islands.
Tom Stellard217361c2015-08-06 19:28:38 +00001563 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1564 } else {
Tom Stellarddee26a22015-08-06 19:28:30 +00001565 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1566 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1567 C32Bit), 0);
Tom Stellarddee26a22015-08-06 19:28:30 +00001568 }
Tom Stellard217361c2015-08-06 19:28:38 +00001569 Imm = false;
1570 return true;
Tom Stellarddee26a22015-08-06 19:28:30 +00001571}
1572
Matt Arsenault923712b2018-02-09 16:57:57 +00001573SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1574 if (Addr.getValueType() != MVT::i32)
1575 return Addr;
1576
1577 // Zero-extend a 32-bit address.
1578 SDLoc SL(Addr);
1579
1580 const MachineFunction &MF = CurDAG->getMachineFunction();
1581 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1582 unsigned AddrHiVal = Info->get32BitAddressHighBits();
1583 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1584
1585 const SDValue Ops[] = {
1586 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1587 Addr,
1588 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1589 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1590 0),
1591 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1592 };
1593
1594 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1595 Ops), 0);
1596}
1597
Tom Stellarddee26a22015-08-06 19:28:30 +00001598bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1599 SDValue &Offset, bool &Imm) const {
Tom Stellarddee26a22015-08-06 19:28:30 +00001600 SDLoc SL(Addr);
Matt Arsenault923712b2018-02-09 16:57:57 +00001601
Marek Olsak3fc20792018-08-29 20:03:00 +00001602 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1603 // wraparound, because s_load instructions perform the addition in 64 bits.
1604 if ((Addr.getValueType() != MVT::i32 ||
1605 Addr->getFlags().hasNoUnsignedWrap()) &&
1606 CurDAG->isBaseWithConstantOffset(Addr)) {
Tom Stellarddee26a22015-08-06 19:28:30 +00001607 SDValue N0 = Addr.getOperand(0);
1608 SDValue N1 = Addr.getOperand(1);
1609
1610 if (SelectSMRDOffset(N1, Offset, Imm)) {
Matt Arsenault923712b2018-02-09 16:57:57 +00001611 SBase = Expand32BitAddress(N0);
Tom Stellarddee26a22015-08-06 19:28:30 +00001612 return true;
1613 }
1614 }
Matt Arsenault923712b2018-02-09 16:57:57 +00001615 SBase = Expand32BitAddress(Addr);
Tom Stellarddee26a22015-08-06 19:28:30 +00001616 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1617 Imm = true;
1618 return true;
1619}
1620
1621bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1622 SDValue &Offset) const {
1623 bool Imm;
Marek Olsak8973a0a2017-05-24 14:53:50 +00001624 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1625}
Tom Stellarddee26a22015-08-06 19:28:30 +00001626
Marek Olsak8973a0a2017-05-24 14:53:50 +00001627bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1628 SDValue &Offset) const {
1629
1630 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1631 return false;
1632
1633 bool Imm;
Tom Stellard217361c2015-08-06 19:28:38 +00001634 if (!SelectSMRD(Addr, SBase, Offset, Imm))
1635 return false;
1636
Marek Olsak8973a0a2017-05-24 14:53:50 +00001637 return !Imm && isa<ConstantSDNode>(Offset);
Tom Stellard217361c2015-08-06 19:28:38 +00001638}
1639
Tom Stellarddee26a22015-08-06 19:28:30 +00001640bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1641 SDValue &Offset) const {
1642 bool Imm;
Tom Stellard217361c2015-08-06 19:28:38 +00001643 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1644 !isa<ConstantSDNode>(Offset);
Tom Stellarddee26a22015-08-06 19:28:30 +00001645}
1646
1647bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1648 SDValue &Offset) const {
1649 bool Imm;
Marek Olsak8973a0a2017-05-24 14:53:50 +00001650 return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1651}
Tom Stellarddee26a22015-08-06 19:28:30 +00001652
Marek Olsak8973a0a2017-05-24 14:53:50 +00001653bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1654 SDValue &Offset) const {
1655 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1656 return false;
1657
1658 bool Imm;
Tom Stellard217361c2015-08-06 19:28:38 +00001659 if (!SelectSMRDOffset(Addr, Offset, Imm))
1660 return false;
1661
Marek Olsak8973a0a2017-05-24 14:53:50 +00001662 return !Imm && isa<ConstantSDNode>(Offset);
Tom Stellard217361c2015-08-06 19:28:38 +00001663}
1664
Nicolai Haehnle7968c342016-07-12 08:12:16 +00001665bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1666 SDValue &Base,
1667 SDValue &Offset) const {
Matt Arsenault1322b6f2016-07-09 01:13:56 +00001668 SDLoc DL(Index);
1669
1670 if (CurDAG->isBaseWithConstantOffset(Index)) {
1671 SDValue N0 = Index.getOperand(0);
1672 SDValue N1 = Index.getOperand(1);
1673 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1674
1675 // (add n0, c0)
Changpeng Fang6f539292018-12-21 20:57:34 +00001676 // Don't peel off the offset (c0) if doing so could possibly lead
1677 // the base (n0) to be negative.
1678 if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1679 Base = N0;
1680 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1681 return true;
1682 }
Matt Arsenault1322b6f2016-07-09 01:13:56 +00001683 }
1684
Nicolai Haehnle7968c342016-07-12 08:12:16 +00001685 if (isa<ConstantSDNode>(Index))
1686 return false;
Matt Arsenault1322b6f2016-07-09 01:13:56 +00001687
1688 Base = Index;
1689 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1690 return true;
1691}
1692
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001693SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1694 SDValue Val, uint32_t Offset,
1695 uint32_t Width) {
Marek Olsak9b728682015-03-24 13:40:27 +00001696 // Transformation function, pack the offset and width of a BFE into
1697 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1698 // source, bits [5:0] contain the offset and bits [22:16] the width.
1699 uint32_t PackedVal = Offset | (Width << 16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001700 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
Marek Olsak9b728682015-03-24 13:40:27 +00001701
1702 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1703}
1704
Justin Bogner95927c02016-05-12 21:03:32 +00001705void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
Marek Olsak9b728682015-03-24 13:40:27 +00001706 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1707 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1708 // Predicate: 0 < b <= c < 32
1709
1710 const SDValue &Shl = N->getOperand(0);
1711 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1712 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1713
1714 if (B && C) {
1715 uint32_t BVal = B->getZExtValue();
1716 uint32_t CVal = C->getZExtValue();
1717
1718 if (0 < BVal && BVal <= CVal && CVal < 32) {
1719 bool Signed = N->getOpcode() == ISD::SRA;
1720 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1721
Justin Bogner95927c02016-05-12 21:03:32 +00001722 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1723 32 - CVal));
1724 return;
Marek Olsak9b728682015-03-24 13:40:27 +00001725 }
1726 }
Justin Bogner95927c02016-05-12 21:03:32 +00001727 SelectCode(N);
Marek Olsak9b728682015-03-24 13:40:27 +00001728}
1729
Justin Bogner95927c02016-05-12 21:03:32 +00001730void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
Marek Olsak9b728682015-03-24 13:40:27 +00001731 switch (N->getOpcode()) {
1732 case ISD::AND:
1733 if (N->getOperand(0).getOpcode() == ISD::SRL) {
1734 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1735 // Predicate: isMask(mask)
1736 const SDValue &Srl = N->getOperand(0);
1737 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1738 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1739
1740 if (Shift && Mask) {
1741 uint32_t ShiftVal = Shift->getZExtValue();
1742 uint32_t MaskVal = Mask->getZExtValue();
1743
1744 if (isMask_32(MaskVal)) {
1745 uint32_t WidthVal = countPopulation(MaskVal);
1746
Justin Bogner95927c02016-05-12 21:03:32 +00001747 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1748 Srl.getOperand(0), ShiftVal, WidthVal));
1749 return;
Marek Olsak9b728682015-03-24 13:40:27 +00001750 }
1751 }
1752 }
1753 break;
1754 case ISD::SRL:
1755 if (N->getOperand(0).getOpcode() == ISD::AND) {
1756 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1757 // Predicate: isMask(mask >> b)
1758 const SDValue &And = N->getOperand(0);
1759 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1760 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1761
1762 if (Shift && Mask) {
1763 uint32_t ShiftVal = Shift->getZExtValue();
1764 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1765
1766 if (isMask_32(MaskVal)) {
1767 uint32_t WidthVal = countPopulation(MaskVal);
1768
Justin Bogner95927c02016-05-12 21:03:32 +00001769 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1770 And.getOperand(0), ShiftVal, WidthVal));
1771 return;
Marek Olsak9b728682015-03-24 13:40:27 +00001772 }
1773 }
Justin Bogner95927c02016-05-12 21:03:32 +00001774 } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1775 SelectS_BFEFromShifts(N);
1776 return;
1777 }
Marek Olsak9b728682015-03-24 13:40:27 +00001778 break;
1779 case ISD::SRA:
Justin Bogner95927c02016-05-12 21:03:32 +00001780 if (N->getOperand(0).getOpcode() == ISD::SHL) {
1781 SelectS_BFEFromShifts(N);
1782 return;
1783 }
Marek Olsak9b728682015-03-24 13:40:27 +00001784 break;
Matt Arsenault7e8de012016-04-22 22:59:16 +00001785
1786 case ISD::SIGN_EXTEND_INREG: {
1787 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1788 SDValue Src = N->getOperand(0);
1789 if (Src.getOpcode() != ISD::SRL)
1790 break;
1791
1792 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1793 if (!Amt)
1794 break;
1795
1796 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
Justin Bogner95927c02016-05-12 21:03:32 +00001797 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1798 Amt->getZExtValue(), Width));
1799 return;
Matt Arsenault7e8de012016-04-22 22:59:16 +00001800 }
Marek Olsak9b728682015-03-24 13:40:27 +00001801 }
1802
Justin Bogner95927c02016-05-12 21:03:32 +00001803 SelectCode(N);
Marek Olsak9b728682015-03-24 13:40:27 +00001804}
1805
Matt Arsenault7b1dc2c2016-09-17 02:02:19 +00001806bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1807 assert(N->getOpcode() == ISD::BRCOND);
1808 if (!N->hasOneUse())
1809 return false;
1810
1811 SDValue Cond = N->getOperand(1);
1812 if (Cond.getOpcode() == ISD::CopyToReg)
1813 Cond = Cond.getOperand(2);
1814
1815 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1816 return false;
1817
1818 MVT VT = Cond.getOperand(0).getSimpleValueType();
1819 if (VT == MVT::i32)
1820 return true;
1821
1822 if (VT == MVT::i64) {
Tom Stellard5bfbae52018-07-11 20:59:01 +00001823 auto ST = static_cast<const GCNSubtarget *>(Subtarget);
Matt Arsenault7b1dc2c2016-09-17 02:02:19 +00001824
1825 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1826 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1827 }
1828
1829 return false;
1830}
1831
Justin Bogner95927c02016-05-12 21:03:32 +00001832void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
Tom Stellardbc4497b2016-02-12 23:45:29 +00001833 SDValue Cond = N->getOperand(1);
1834
Matt Arsenault327188a2016-12-15 21:57:11 +00001835 if (Cond.isUndef()) {
1836 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1837 N->getOperand(2), N->getOperand(0));
1838 return;
1839 }
1840
Matt Arsenaultd674e0a2017-10-10 20:34:49 +00001841 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1842 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1843 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
Tom Stellardbc4497b2016-02-12 23:45:29 +00001844 SDLoc SL(N);
1845
Tim Renouf6eaad1e2018-01-09 21:34:43 +00001846 if (!UseSCCBr) {
1847 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1848 // analyzed what generates the vcc value, so we do not know whether vcc
1849 // bits for disabled lanes are 0. Thus we need to mask out bits for
1850 // disabled lanes.
1851 //
1852 // For the case that we select S_CBRANCH_SCC1 and it gets
1853 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1854 // SIInstrInfo::moveToVALU which inserts the S_AND).
1855 //
1856 // We could add an analysis of what generates the vcc value here and omit
1857 // the S_AND when is unnecessary. But it would be better to add a separate
1858 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1859 // catches both cases.
1860 Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1861 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1862 Cond),
1863 0);
1864 }
1865
Matt Arsenaultd674e0a2017-10-10 20:34:49 +00001866 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1867 CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
Justin Bogner95927c02016-05-12 21:03:32 +00001868 N->getOperand(2), // Basic Block
Matt Arsenaultf530e8b2016-11-07 19:09:33 +00001869 VCC.getValue(0));
Tom Stellardbc4497b2016-02-12 23:45:29 +00001870}
1871
Matt Arsenault0084adc2018-04-30 19:08:16 +00001872void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
Matt Arsenaultd7e23032017-09-07 18:05:07 +00001873 MVT VT = N->getSimpleValueType(0);
Matt Arsenault0084adc2018-04-30 19:08:16 +00001874 bool IsFMA = N->getOpcode() == ISD::FMA;
1875 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1876 !Subtarget->hasFmaMixInsts()) ||
1877 ((IsFMA && Subtarget->hasMadMixInsts()) ||
1878 (!IsFMA && Subtarget->hasFmaMixInsts()))) {
Matt Arsenaultd7e23032017-09-07 18:05:07 +00001879 SelectCode(N);
1880 return;
1881 }
1882
1883 SDValue Src0 = N->getOperand(0);
1884 SDValue Src1 = N->getOperand(1);
1885 SDValue Src2 = N->getOperand(2);
1886 unsigned Src0Mods, Src1Mods, Src2Mods;
1887
Matt Arsenault0084adc2018-04-30 19:08:16 +00001888 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1889 // using the conversion from f16.
Matt Arsenaultd7e23032017-09-07 18:05:07 +00001890 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1891 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1892 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1893
Matt Arsenault0084adc2018-04-30 19:08:16 +00001894 assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
Matt Arsenaultd7e23032017-09-07 18:05:07 +00001895 "fmad selected with denormals enabled");
1896 // TODO: We can select this with f32 denormals enabled if all the sources are
1897 // converted from f16 (in which case fmad isn't legal).
1898
1899 if (Sel0 || Sel1 || Sel2) {
1900 // For dummy operands.
1901 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1902 SDValue Ops[] = {
1903 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1904 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1905 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1906 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1907 Zero, Zero
1908 };
1909
Matt Arsenault0084adc2018-04-30 19:08:16 +00001910 CurDAG->SelectNodeTo(N,
1911 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1912 MVT::f32, Ops);
Matt Arsenaultd7e23032017-09-07 18:05:07 +00001913 } else {
1914 SelectCode(N);
1915 }
1916}
1917
Matt Arsenault88701812016-06-09 23:42:48 +00001918// This is here because there isn't a way to use the generated sub0_sub1 as the
1919// subreg index to EXTRACT_SUBREG in tablegen.
1920void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1921 MemSDNode *Mem = cast<MemSDNode>(N);
1922 unsigned AS = Mem->getAddressSpace();
Matt Arsenault0da63502018-08-31 05:49:54 +00001923 if (AS == AMDGPUAS::FLAT_ADDRESS) {
Matt Arsenault7757c592016-06-09 23:42:54 +00001924 SelectCode(N);
1925 return;
1926 }
Matt Arsenault88701812016-06-09 23:42:48 +00001927
1928 MVT VT = N->getSimpleValueType(0);
1929 bool Is32 = (VT == MVT::i32);
1930 SDLoc SL(N);
1931
1932 MachineSDNode *CmpSwap = nullptr;
1933 if (Subtarget->hasAddr64()) {
Vitaly Buka74503982017-10-15 05:35:02 +00001934 SDValue SRsrc, VAddr, SOffset, Offset, SLC;
Matt Arsenault88701812016-06-09 23:42:48 +00001935
1936 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
Matt Arsenaulte5456ce2017-07-20 21:06:04 +00001937 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1938 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
Matt Arsenault88701812016-06-09 23:42:48 +00001939 SDValue CmpVal = Mem->getOperand(2);
1940
1941 // XXX - Do we care about glue operands?
1942
1943 SDValue Ops[] = {
1944 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1945 };
1946
1947 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1948 }
1949 }
1950
1951 if (!CmpSwap) {
1952 SDValue SRsrc, SOffset, Offset, SLC;
1953 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
Matt Arsenaulte5456ce2017-07-20 21:06:04 +00001954 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1955 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
Matt Arsenault88701812016-06-09 23:42:48 +00001956
1957 SDValue CmpVal = Mem->getOperand(2);
1958 SDValue Ops[] = {
1959 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1960 };
1961
1962 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1963 }
1964 }
1965
1966 if (!CmpSwap) {
1967 SelectCode(N);
1968 return;
1969 }
1970
Chandler Carruth66654b72018-08-14 23:30:32 +00001971 MachineMemOperand *MMO = Mem->getMemOperand();
1972 CurDAG->setNodeMemRefs(CmpSwap, {MMO});
Matt Arsenault88701812016-06-09 23:42:48 +00001973
1974 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1975 SDValue Extract
1976 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1977
1978 ReplaceUses(SDValue(N, 0), Extract);
1979 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1980 CurDAG->RemoveDeadNode(N);
1981}
1982
Matt Arsenaultcdd191d2019-01-28 20:14:49 +00001983void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
1984 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1985 if ((IntrID != Intrinsic::amdgcn_ds_append &&
1986 IntrID != Intrinsic::amdgcn_ds_consume) ||
1987 N->getValueType(0) != MVT::i32) {
1988 SelectCode(N);
1989 return;
1990 }
1991
1992 // The address is assumed to be uniform, so if it ends up in a VGPR, it will
1993 // be copied to an SGPR with readfirstlane.
1994 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
1995 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1996
1997 SDValue Chain = N->getOperand(0);
1998 SDValue Ptr = N->getOperand(2);
1999 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2000 bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2001
2002 SDValue Offset;
2003 if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2004 SDValue PtrBase = Ptr.getOperand(0);
2005 SDValue PtrOffset = Ptr.getOperand(1);
2006
2007 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2008 if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2009 N = glueCopyToM0(N, PtrBase);
2010 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2011 }
2012 }
2013
2014 if (!Offset) {
2015 N = glueCopyToM0(N, Ptr);
2016 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2017 }
2018
2019 SDValue Ops[] = {
2020 Offset,
2021 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2022 Chain,
2023 N->getOperand(N->getNumOperands() - 1) // New glue
2024 };
2025
2026 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2027}
2028
Matt Arsenaultd7e23032017-09-07 18:05:07 +00002029bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2030 unsigned &Mods) const {
2031 Mods = 0;
Tom Stellardb4a313a2014-08-01 00:32:39 +00002032 Src = In;
2033
2034 if (Src.getOpcode() == ISD::FNEG) {
2035 Mods |= SISrcMods::NEG;
2036 Src = Src.getOperand(0);
2037 }
2038
2039 if (Src.getOpcode() == ISD::FABS) {
2040 Mods |= SISrcMods::ABS;
2041 Src = Src.getOperand(0);
2042 }
2043
Tom Stellardb4a313a2014-08-01 00:32:39 +00002044 return true;
2045}
2046
Matt Arsenaultd7e23032017-09-07 18:05:07 +00002047bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2048 SDValue &SrcMods) const {
2049 unsigned Mods;
2050 if (SelectVOP3ModsImpl(In, Src, Mods)) {
2051 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2052 return true;
2053 }
2054
2055 return false;
2056}
2057
Matt Arsenaultf84e5d92017-01-31 03:07:46 +00002058bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2059 SDValue &SrcMods) const {
2060 SelectVOP3Mods(In, Src, SrcMods);
2061 return isNoNanSrc(Src);
2062}
2063
Matt Arsenaultdf58e822017-04-25 21:17:38 +00002064bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2065 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2066 return false;
2067
2068 Src = In;
2069 return true;
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002070}
2071
Tom Stellardb4a313a2014-08-01 00:32:39 +00002072bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2073 SDValue &SrcMods, SDValue &Clamp,
2074 SDValue &Omod) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002075 SDLoc DL(In);
Matt Arsenaultdf58e822017-04-25 21:17:38 +00002076 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2077 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
Tom Stellardb4a313a2014-08-01 00:32:39 +00002078
2079 return SelectVOP3Mods(In, Src, SrcMods);
2080}
2081
Matt Arsenault4831ce52015-01-06 23:00:37 +00002082bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2083 SDValue &SrcMods,
2084 SDValue &Clamp,
2085 SDValue &Omod) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002086 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
Matt Arsenault4831ce52015-01-06 23:00:37 +00002087 return SelectVOP3Mods(In, Src, SrcMods);
2088}
2089
Dmitry Preobrazhenskyc512d442017-03-27 15:57:17 +00002090bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2091 SDValue &Clamp, SDValue &Omod) const {
2092 Src = In;
2093
2094 SDLoc DL(In);
Matt Arsenaultdf58e822017-04-25 21:17:38 +00002095 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2096 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
Dmitry Preobrazhenskyc512d442017-03-27 15:57:17 +00002097
2098 return true;
2099}
2100
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002101bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2102 SDValue &SrcMods) const {
2103 unsigned Mods = 0;
2104 Src = In;
2105
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002106 if (Src.getOpcode() == ISD::FNEG) {
Matt Arsenault786eeea2017-05-17 20:00:00 +00002107 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002108 Src = Src.getOperand(0);
2109 }
2110
Matt Arsenault786eeea2017-05-17 20:00:00 +00002111 if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2112 unsigned VecMods = Mods;
2113
Matt Arsenault98f29462017-05-17 20:30:58 +00002114 SDValue Lo = stripBitcast(Src.getOperand(0));
2115 SDValue Hi = stripBitcast(Src.getOperand(1));
Matt Arsenault786eeea2017-05-17 20:00:00 +00002116
2117 if (Lo.getOpcode() == ISD::FNEG) {
Matt Arsenault98f29462017-05-17 20:30:58 +00002118 Lo = stripBitcast(Lo.getOperand(0));
Matt Arsenault786eeea2017-05-17 20:00:00 +00002119 Mods ^= SISrcMods::NEG;
2120 }
2121
2122 if (Hi.getOpcode() == ISD::FNEG) {
Matt Arsenault98f29462017-05-17 20:30:58 +00002123 Hi = stripBitcast(Hi.getOperand(0));
Matt Arsenault786eeea2017-05-17 20:00:00 +00002124 Mods ^= SISrcMods::NEG_HI;
2125 }
2126
Matt Arsenault98f29462017-05-17 20:30:58 +00002127 if (isExtractHiElt(Lo, Lo))
2128 Mods |= SISrcMods::OP_SEL_0;
2129
2130 if (isExtractHiElt(Hi, Hi))
2131 Mods |= SISrcMods::OP_SEL_1;
2132
2133 Lo = stripExtractLoElt(Lo);
2134 Hi = stripExtractLoElt(Hi);
2135
Matt Arsenault786eeea2017-05-17 20:00:00 +00002136 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2137 // Really a scalar input. Just select from the low half of the register to
2138 // avoid packing.
2139
2140 Src = Lo;
2141 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2142 return true;
2143 }
2144
2145 Mods = VecMods;
2146 }
2147
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002148 // Packed instructions do not have abs modifiers.
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002149 Mods |= SISrcMods::OP_SEL_1;
2150
2151 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2152 return true;
2153}
2154
2155bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2156 SDValue &SrcMods,
2157 SDValue &Clamp) const {
2158 SDLoc SL(In);
2159
2160 // FIXME: Handle clamp and op_sel
2161 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2162
2163 return SelectVOP3PMods(In, Src, SrcMods);
2164}
2165
Dmitry Preobrazhenskyabf28392017-07-21 13:54:11 +00002166bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2167 SDValue &SrcMods) const {
2168 Src = In;
2169 // FIXME: Handle op_sel
2170 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2171 return true;
2172}
2173
2174bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2175 SDValue &SrcMods,
2176 SDValue &Clamp) const {
2177 SDLoc SL(In);
2178
2179 // FIXME: Handle clamp
2180 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2181
2182 return SelectVOP3OpSel(In, Src, SrcMods);
2183}
2184
2185bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2186 SDValue &SrcMods) const {
2187 // FIXME: Handle op_sel
2188 return SelectVOP3Mods(In, Src, SrcMods);
2189}
2190
2191bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2192 SDValue &SrcMods,
2193 SDValue &Clamp) const {
2194 SDLoc SL(In);
2195
2196 // FIXME: Handle clamp
2197 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2198
2199 return SelectVOP3OpSelMods(In, Src, SrcMods);
2200}
2201
Matt Arsenaultd7e23032017-09-07 18:05:07 +00002202// The return value is not whether the match is possible (which it always is),
2203// but whether or not it a conversion is really used.
2204bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2205 unsigned &Mods) const {
2206 Mods = 0;
2207 SelectVOP3ModsImpl(In, Src, Mods);
2208
2209 if (Src.getOpcode() == ISD::FP_EXTEND) {
2210 Src = Src.getOperand(0);
2211 assert(Src.getValueType() == MVT::f16);
2212 Src = stripBitcast(Src);
2213
Matt Arsenault550c66d2017-10-13 20:45:49 +00002214 // Be careful about folding modifiers if we already have an abs. fneg is
2215 // applied last, so we don't want to apply an earlier fneg.
2216 if ((Mods & SISrcMods::ABS) == 0) {
2217 unsigned ModsTmp;
2218 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2219
2220 if ((ModsTmp & SISrcMods::NEG) != 0)
2221 Mods ^= SISrcMods::NEG;
2222
2223 if ((ModsTmp & SISrcMods::ABS) != 0)
2224 Mods |= SISrcMods::ABS;
2225 }
2226
Matt Arsenaultd7e23032017-09-07 18:05:07 +00002227 // op_sel/op_sel_hi decide the source type and source.
2228 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2229 // If the sources's op_sel is set, it picks the high half of the source
2230 // register.
2231
2232 Mods |= SISrcMods::OP_SEL_1;
Matt Arsenault550c66d2017-10-13 20:45:49 +00002233 if (isExtractHiElt(Src, Src)) {
Matt Arsenaultd7e23032017-09-07 18:05:07 +00002234 Mods |= SISrcMods::OP_SEL_0;
2235
Matt Arsenault550c66d2017-10-13 20:45:49 +00002236 // TODO: Should we try to look for neg/abs here?
2237 }
2238
Matt Arsenaultd7e23032017-09-07 18:05:07 +00002239 return true;
2240 }
2241
2242 return false;
2243}
2244
Matt Arsenault76935122017-09-20 20:28:39 +00002245bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2246 SDValue &SrcMods) const {
2247 unsigned Mods = 0;
2248 SelectVOP3PMadMixModsImpl(In, Src, Mods);
2249 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2250 return true;
2251}
2252
Matt Arsenaulte8c03a22019-03-08 20:58:11 +00002253SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2254 if (In.isUndef())
2255 return CurDAG->getUNDEF(MVT::i32);
2256
2257 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2258 SDLoc SL(In);
2259 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2260 }
2261
2262 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2263 SDLoc SL(In);
2264 return CurDAG->getConstant(
2265 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2266 }
2267
2268 SDValue Src;
2269 if (isExtractHiElt(In, Src))
2270 return Src;
2271
2272 return SDValue();
2273}
2274
Alexander Timofeevdb7ee762018-09-11 11:56:50 +00002275bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2276 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2277 return false;
2278 }
2279 const SIRegisterInfo *SIRI =
2280 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2281 const SIInstrInfo * SII =
2282 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2283
2284 unsigned Limit = 0;
2285 bool AllUsesAcceptSReg = true;
2286 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2287 Limit < 10 && U != E; ++U, ++Limit) {
2288 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2289
2290 // If the register class is unknown, it could be an unknown
2291 // register class that needs to be an SGPR, e.g. an inline asm
2292 // constraint
2293 if (!RC || SIRI->isSGPRClass(RC))
2294 return false;
2295
2296 if (RC != &AMDGPU::VS_32RegClass) {
2297 AllUsesAcceptSReg = false;
2298 SDNode * User = *U;
2299 if (User->isMachineOpcode()) {
2300 unsigned Opc = User->getMachineOpcode();
2301 MCInstrDesc Desc = SII->get(Opc);
2302 if (Desc.isCommutable()) {
2303 unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2304 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2305 if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2306 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2307 const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2308 if (CommutedRC == &AMDGPU::VS_32RegClass)
2309 AllUsesAcceptSReg = true;
2310 }
2311 }
2312 }
2313 // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2314 // commuting current user. This means have at least one use
2315 // that strictly require VGPR. Thus, we will not attempt to commute
2316 // other user instructions.
2317 if (!AllUsesAcceptSReg)
2318 break;
2319 }
2320 }
2321 return !AllUsesAcceptSReg && (Limit < 10);
2322}
2323
Alexander Timofeev4d302f62018-09-13 09:06:56 +00002324bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2325 auto Ld = cast<LoadSDNode>(N);
2326
2327 return Ld->getAlignment() >= 4 &&
2328 (
2329 (
2330 (
2331 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2332 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2333 )
2334 &&
2335 !N->isDivergent()
2336 )
2337 ||
2338 (
2339 Subtarget->getScalarizeGlobalBehavior() &&
2340 Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2341 !Ld->isVolatile() &&
2342 !N->isDivergent() &&
2343 static_cast<const SITargetLowering *>(
2344 getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2345 )
2346 );
2347}
Alexander Timofeevdb7ee762018-09-11 11:56:50 +00002348
Christian Konigd910b7d2013-02-26 17:52:16 +00002349void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
Bill Wendlinga3cd3502013-06-19 21:36:55 +00002350 const AMDGPUTargetLowering& Lowering =
Matt Arsenault209a7b92014-04-18 07:40:20 +00002351 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002352 bool IsModified = false;
2353 do {
2354 IsModified = false;
Matt Arsenault68f05052017-12-04 22:18:27 +00002355
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002356 // Go over all selected nodes and try to fold them a bit more
Matt Arsenault68f05052017-12-04 22:18:27 +00002357 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2358 while (Position != CurDAG->allnodes_end()) {
2359 SDNode *Node = &*Position++;
2360 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002361 if (!MachineNode)
2362 continue;
Christian Konigd910b7d2013-02-26 17:52:16 +00002363
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002364 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
Matt Arsenault68f05052017-12-04 22:18:27 +00002365 if (ResNode != Node) {
2366 if (ResNode)
2367 ReplaceUses(Node, ResNode);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002368 IsModified = true;
2369 }
Tom Stellard2183b702013-06-03 17:39:46 +00002370 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002371 CurDAG->RemoveDeadNodes();
2372 } while (IsModified);
Christian Konigd910b7d2013-02-26 17:52:16 +00002373}
Tom Stellard20287692017-08-08 04:57:55 +00002374
Tom Stellardc5a154d2018-06-28 23:47:12 +00002375bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2376 Subtarget = &MF.getSubtarget<R600Subtarget>();
2377 return SelectionDAGISel::runOnMachineFunction(MF);
2378}
2379
2380bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2381 if (!N->readMem())
2382 return false;
2383 if (CbId == -1)
Matt Arsenault0da63502018-08-31 05:49:54 +00002384 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2385 N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
Tom Stellardc5a154d2018-06-28 23:47:12 +00002386
Matt Arsenault0da63502018-08-31 05:49:54 +00002387 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
Tom Stellardc5a154d2018-06-28 23:47:12 +00002388}
2389
2390bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2391 SDValue& IntPtr) {
2392 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2393 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2394 true);
2395 return true;
2396 }
2397 return false;
2398}
2399
2400bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2401 SDValue& BaseReg, SDValue &Offset) {
2402 if (!isa<ConstantSDNode>(Addr)) {
2403 BaseReg = Addr;
2404 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2405 return true;
2406 }
2407 return false;
2408}
2409
Tom Stellard20287692017-08-08 04:57:55 +00002410void R600DAGToDAGISel::Select(SDNode *N) {
2411 unsigned int Opc = N->getOpcode();
2412 if (N->isMachineOpcode()) {
2413 N->setNodeId(-1);
2414 return; // Already selected.
2415 }
2416
2417 switch (Opc) {
2418 default: break;
2419 case AMDGPUISD::BUILD_VERTICAL_VECTOR:
2420 case ISD::SCALAR_TO_VECTOR:
2421 case ISD::BUILD_VECTOR: {
2422 EVT VT = N->getValueType(0);
2423 unsigned NumVectorElts = VT.getVectorNumElements();
2424 unsigned RegClassID;
2425 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2426 // that adds a 128 bits reg copy when going through TwoAddressInstructions
2427 // pass. We want to avoid 128 bits copies as much as possible because they
2428 // can't be bundled by our scheduler.
2429 switch(NumVectorElts) {
Tom Stellardc5a154d2018-06-28 23:47:12 +00002430 case 2: RegClassID = R600::R600_Reg64RegClassID; break;
Tom Stellard20287692017-08-08 04:57:55 +00002431 case 4:
2432 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
Tom Stellardc5a154d2018-06-28 23:47:12 +00002433 RegClassID = R600::R600_Reg128VerticalRegClassID;
Tom Stellard20287692017-08-08 04:57:55 +00002434 else
Tom Stellardc5a154d2018-06-28 23:47:12 +00002435 RegClassID = R600::R600_Reg128RegClassID;
Tom Stellard20287692017-08-08 04:57:55 +00002436 break;
2437 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2438 }
2439 SelectBuildVector(N, RegClassID);
2440 return;
2441 }
2442 }
2443
2444 SelectCode(N);
2445}
2446
2447bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2448 SDValue &Offset) {
2449 ConstantSDNode *C;
2450 SDLoc DL(Addr);
2451
2452 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Tom Stellardc5a154d2018-06-28 23:47:12 +00002453 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Tom Stellard20287692017-08-08 04:57:55 +00002454 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2455 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2456 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
Tom Stellardc5a154d2018-06-28 23:47:12 +00002457 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Tom Stellard20287692017-08-08 04:57:55 +00002458 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2459 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2460 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2461 Base = Addr.getOperand(0);
2462 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2463 } else {
2464 Base = Addr;
2465 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2466 }
2467
2468 return true;
2469}
2470
2471bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2472 SDValue &Offset) {
2473 ConstantSDNode *IMMOffset;
2474
2475 if (Addr.getOpcode() == ISD::ADD
2476 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2477 && isInt<16>(IMMOffset->getZExtValue())) {
2478
2479 Base = Addr.getOperand(0);
2480 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2481 MVT::i32);
2482 return true;
2483 // If the pointer address is constant, we can move it to the offset field.
2484 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2485 && isInt<16>(IMMOffset->getZExtValue())) {
2486 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2487 SDLoc(CurDAG->getEntryNode()),
Tom Stellardc5a154d2018-06-28 23:47:12 +00002488 R600::ZERO, MVT::i32);
Tom Stellard20287692017-08-08 04:57:55 +00002489 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2490 MVT::i32);
2491 return true;
2492 }
2493
2494 // Default case, no offset
2495 Base = Addr;
2496 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2497 return true;
2498}