blob: 8f78edd76a51e168f55f1dc8ad98b3fdf85c089f [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Matt Arsenault43e92fe2016-06-24 06:30:11 +000033R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
34 const R600Subtarget &STI)
Eric Christopher7792e322015-01-30 23:24:40 +000035 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard75aadc22012-12-11 21:25:42 +000037 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000038 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
39 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
Matt Arsenault71e66762016-05-21 02:27:49 +000040 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
41 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Matt Arsenault71e66762016-05-21 02:27:49 +000045 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, MVT::i32, Custom);
47 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
48 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
49
50 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51 // spaces, so it is custom lowered to handle those where it isn't.
52 for (MVT VT : MVT::integer_valuetypes()) {
53 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
54 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
55 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
56
57 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
58 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
59 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
60
61 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
62 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
63 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
64 }
65
Matt Arsenaultd1097a32016-06-02 19:54:26 +000066 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
67 setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
68 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
69 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
70
71 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
72 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
73 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
74
75
Matt Arsenault71e66762016-05-21 02:27:49 +000076 setOperationAction(ISD::STORE, MVT::i8, Custom);
77 setOperationAction(ISD::STORE, MVT::i32, Custom);
78 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
79 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
80
81 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
82 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
83
Matt Arsenaultd1097a32016-06-02 19:54:26 +000084 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
85 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
86 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
87
Tom Stellard0351ea22013-09-28 02:50:50 +000088 // Set condition code actions
89 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
90 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000091 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000092 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000093 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
94 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000095 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
96 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
97 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
98 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000099 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
100 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
101
102 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
103 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
104 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
105 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
106
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000107 setOperationAction(ISD::FCOS, MVT::f32, Custom);
108 setOperationAction(ISD::FSIN, MVT::f32, Custom);
109
Tom Stellard75aadc22012-12-11 21:25:42 +0000110 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000111 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000112
Tom Stellard492ebea2013-03-08 15:37:07 +0000113 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
114 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +0000115 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000116
117 setOperationAction(ISD::FSUB, MVT::f32, Expand);
118
Tom Stellard75aadc22012-12-11 21:25:42 +0000119 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
120 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
121
Tom Stellarde8f9f282013-03-08 15:37:05 +0000122 setOperationAction(ISD::SETCC, MVT::i32, Expand);
123 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000124 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000125 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
126 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000127
Tom Stellard53f2f902013-09-05 18:38:03 +0000128 setOperationAction(ISD::SELECT, MVT::i32, Expand);
129 setOperationAction(ISD::SELECT, MVT::f32, Expand);
130 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +0000131 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000132
Jan Vesely808fff52015-04-30 17:15:56 +0000133 // ADD, SUB overflow.
134 // TODO: turn these into Legal?
135 if (Subtarget->hasCARRY())
136 setOperationAction(ISD::UADDO, MVT::i32, Custom);
137
138 if (Subtarget->hasBORROW())
139 setOperationAction(ISD::USUBO, MVT::i32, Custom);
140
Matt Arsenault4e466652014-04-16 01:41:30 +0000141 // Expand sign extension of vectors
142 if (!Subtarget->hasBFE())
143 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
144
145 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
146 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
147
148 if (!Subtarget->hasBFE())
149 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
150 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
151 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
152
153 if (!Subtarget->hasBFE())
154 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
157
158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
161
162 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
163
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000164 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
165
Tom Stellard880a80a2014-06-17 16:53:14 +0000166 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
167 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
168 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
169 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
170
171 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
172 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
173 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
174 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
175
Jan Vesely25f36272014-06-18 12:27:13 +0000176 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
177 // to be Legal/Custom in order to avoid library calls.
178 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000179 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000180 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000181
Michel Danzer49812b52013-07-10 16:37:07 +0000182 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
183
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000184 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
185 for (MVT VT : ScalarIntVTs) {
186 setOperationAction(ISD::ADDC, VT, Expand);
187 setOperationAction(ISD::SUBC, VT, Expand);
188 setOperationAction(ISD::ADDE, VT, Expand);
189 setOperationAction(ISD::SUBE, VT, Expand);
190 }
191
Tom Stellardfc455472013-08-12 22:33:21 +0000192 setSchedulingPreference(Sched::Source);
Matt Arsenault71e66762016-05-21 02:27:49 +0000193
194
195 setTargetDAGCombine(ISD::FP_ROUND);
196 setTargetDAGCombine(ISD::FP_TO_SINT);
197 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
198 setTargetDAGCombine(ISD::SELECT_CC);
199 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000200}
201
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000202const R600Subtarget *R600TargetLowering::getSubtarget() const {
203 return static_cast<const R600Subtarget *>(Subtarget);
204}
205
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000206static inline bool isEOP(MachineBasicBlock::iterator I) {
207 return std::next(I)->getOpcode() == AMDGPU::RETURN;
208}
209
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000210MachineBasicBlock *
211R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
212 MachineBasicBlock *BB) const {
Tom Stellard75aadc22012-12-11 21:25:42 +0000213 MachineFunction * MF = BB->getParent();
214 MachineRegisterInfo &MRI = MF->getRegInfo();
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000215 MachineBasicBlock::iterator I = MI;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000216 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Tom Stellard75aadc22012-12-11 21:25:42 +0000217
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000218 switch (MI.getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000219 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000220 // Replace LDS_*_RET instruction that don't have any uses with the
221 // equivalent LDS_*_NORET instruction.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000222 if (TII->isLDSRetInstr(MI.getOpcode())) {
223 int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
Tom Stellard13c68ef2013-09-05 18:38:09 +0000224 assert(DstIdx != -1);
225 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000226 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
227 // LDS_1A2D support and remove this special case.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000228 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
229 MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000230 return BB;
231
232 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000233 TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
234 for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
235 NewMI.addOperand(MI.getOperand(i));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000236 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000237 } else {
238 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
239 }
240 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000241 case AMDGPU::CLAMP_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000242 MachineInstr *NewMI = TII->buildDefaultInstruction(
243 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
244 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000245 TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000246 break;
247 }
248
249 case AMDGPU::FABS_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000250 MachineInstr *NewMI = TII->buildDefaultInstruction(
251 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
252 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000253 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
Tom Stellard75aadc22012-12-11 21:25:42 +0000254 break;
255 }
256
257 case AMDGPU::FNEG_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000258 MachineInstr *NewMI = TII->buildDefaultInstruction(
259 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
260 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000261 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000262 break;
263 }
264
Tom Stellard75aadc22012-12-11 21:25:42 +0000265 case AMDGPU::MASK_WRITE: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000266 unsigned maskedRegister = MI.getOperand(0).getReg();
Tom Stellard75aadc22012-12-11 21:25:42 +0000267 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
268 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000269 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
Tom Stellard75aadc22012-12-11 21:25:42 +0000270 break;
271 }
272
273 case AMDGPU::MOV_IMM_F32:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000274 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
275 .getFPImm()
276 ->getValueAPF()
277 .bitcastToAPInt()
278 .getZExtValue());
Tom Stellard75aadc22012-12-11 21:25:42 +0000279 break;
280 case AMDGPU::MOV_IMM_I32:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000281 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
282 MI.getOperand(1).getImm());
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 break;
Jan Veselyf97de002016-05-13 20:39:29 +0000284 case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
285 //TODO: Perhaps combine this instruction with the next if possible
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000286 auto MIB = TII->buildDefaultInstruction(
287 *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
Jan Veselyf97de002016-05-13 20:39:29 +0000288 int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
289 //TODO: Ugh this is rather ugly
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000290 MIB->getOperand(Idx) = MI.getOperand(1);
Jan Veselyf97de002016-05-13 20:39:29 +0000291 break;
292 }
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000293 case AMDGPU::CONST_COPY: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000294 MachineInstr *NewMI = TII->buildDefaultInstruction(
295 *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000296 TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000297 MI.getOperand(1).getImm());
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000298 break;
299 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000300
301 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000302 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000303 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000304 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
305 .addOperand(MI.getOperand(0))
306 .addOperand(MI.getOperand(1))
307 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000308 break;
309 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000310 case AMDGPU::RAT_STORE_TYPED_eg: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000311 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
312 .addOperand(MI.getOperand(0))
313 .addOperand(MI.getOperand(1))
314 .addOperand(MI.getOperand(2))
315 .addImm(isEOP(I)); // Set End of program bit
Tom Stellarde0e582c2015-10-01 17:51:34 +0000316 break;
317 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000318
Tom Stellard75aadc22012-12-11 21:25:42 +0000319 case AMDGPU::TXD: {
320 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
321 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000322 MachineOperand &RID = MI.getOperand(4);
323 MachineOperand &SID = MI.getOperand(5);
324 unsigned TextureId = MI.getOperand(6).getImm();
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000325 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
326 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000327
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 switch (TextureId) {
329 case 5: // Rect
330 CTX = CTY = 0;
331 break;
332 case 6: // Shadow1D
333 SrcW = SrcZ;
334 break;
335 case 7: // Shadow2D
336 SrcW = SrcZ;
337 break;
338 case 8: // ShadowRect
339 CTX = CTY = 0;
340 SrcW = SrcZ;
341 break;
342 case 9: // 1DArray
343 SrcZ = SrcY;
344 CTZ = 0;
345 break;
346 case 10: // 2DArray
347 CTZ = 0;
348 break;
349 case 11: // Shadow1DArray
350 SrcZ = SrcY;
351 CTZ = 0;
352 break;
353 case 12: // Shadow2DArray
354 CTZ = 0;
355 break;
356 }
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
358 T0)
359 .addOperand(MI.getOperand(3))
360 .addImm(SrcX)
361 .addImm(SrcY)
362 .addImm(SrcZ)
363 .addImm(SrcW)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(1)
369 .addImm(2)
370 .addImm(3)
371 .addOperand(RID)
372 .addOperand(SID)
373 .addImm(CTX)
374 .addImm(CTY)
375 .addImm(CTZ)
376 .addImm(CTW);
377 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
378 T1)
379 .addOperand(MI.getOperand(2))
380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000398 .addOperand(MI.getOperand(0))
399 .addOperand(MI.getOperand(1))
400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000419 break;
420 }
421
422 case AMDGPU::TXD_SHADOW: {
423 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
424 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000425 MachineOperand &RID = MI.getOperand(4);
426 MachineOperand &SID = MI.getOperand(5);
427 unsigned TextureId = MI.getOperand(6).getImm();
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000428 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
429 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
430
431 switch (TextureId) {
432 case 5: // Rect
433 CTX = CTY = 0;
434 break;
435 case 6: // Shadow1D
436 SrcW = SrcZ;
437 break;
438 case 7: // Shadow2D
439 SrcW = SrcZ;
440 break;
441 case 8: // ShadowRect
442 CTX = CTY = 0;
443 SrcW = SrcZ;
444 break;
445 case 9: // 1DArray
446 SrcZ = SrcY;
447 CTZ = 0;
448 break;
449 case 10: // 2DArray
450 CTZ = 0;
451 break;
452 case 11: // Shadow1DArray
453 SrcZ = SrcY;
454 CTZ = 0;
455 break;
456 case 12: // Shadow2DArray
457 CTZ = 0;
458 break;
459 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000460
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000461 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
462 T0)
463 .addOperand(MI.getOperand(3))
464 .addImm(SrcX)
465 .addImm(SrcY)
466 .addImm(SrcZ)
467 .addImm(SrcW)
468 .addImm(0)
469 .addImm(0)
470 .addImm(0)
471 .addImm(0)
472 .addImm(1)
473 .addImm(2)
474 .addImm(3)
475 .addOperand(RID)
476 .addOperand(SID)
477 .addImm(CTX)
478 .addImm(CTY)
479 .addImm(CTZ)
480 .addImm(CTW);
481 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
482 T1)
483 .addOperand(MI.getOperand(2))
484 .addImm(SrcX)
485 .addImm(SrcY)
486 .addImm(SrcZ)
487 .addImm(SrcW)
488 .addImm(0)
489 .addImm(0)
490 .addImm(0)
491 .addImm(0)
492 .addImm(1)
493 .addImm(2)
494 .addImm(3)
495 .addOperand(RID)
496 .addOperand(SID)
497 .addImm(CTX)
498 .addImm(CTY)
499 .addImm(CTZ)
500 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000501 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000502 .addOperand(MI.getOperand(0))
503 .addOperand(MI.getOperand(1))
504 .addImm(SrcX)
505 .addImm(SrcY)
506 .addImm(SrcZ)
507 .addImm(SrcW)
508 .addImm(0)
509 .addImm(0)
510 .addImm(0)
511 .addImm(0)
512 .addImm(1)
513 .addImm(2)
514 .addImm(3)
515 .addOperand(RID)
516 .addOperand(SID)
517 .addImm(CTX)
518 .addImm(CTY)
519 .addImm(CTZ)
520 .addImm(CTW)
521 .addReg(T0, RegState::Implicit)
522 .addReg(T1, RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000523 break;
524 }
525
526 case AMDGPU::BRANCH:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000527 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
528 .addOperand(MI.getOperand(0));
529 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000530
531 case AMDGPU::BRANCH_COND_f32: {
532 MachineInstr *NewMI =
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000533 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
534 AMDGPU::PREDICATE_BIT)
535 .addOperand(MI.getOperand(1))
536 .addImm(OPCODE_IS_NOT_ZERO)
537 .addImm(0); // Flags
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000538 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000539 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000540 .addOperand(MI.getOperand(0))
541 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 break;
543 }
544
545 case AMDGPU::BRANCH_COND_i32: {
546 MachineInstr *NewMI =
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000547 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
548 AMDGPU::PREDICATE_BIT)
549 .addOperand(MI.getOperand(1))
Tom Stellard75aadc22012-12-11 21:25:42 +0000550 .addImm(OPCODE_IS_NOT_ZERO_INT)
551 .addImm(0); // Flags
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000552 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000553 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000554 .addOperand(MI.getOperand(0))
555 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 break;
557 }
558
Tom Stellard75aadc22012-12-11 21:25:42 +0000559 case AMDGPU::EG_ExportSwz:
560 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000561 // Instruction is left unmodified if its not the last one of its type
562 bool isLastInstructionOfItsType = true;
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000563 unsigned InstExportType = MI.getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000564 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000566 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000567 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
568 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
569 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
570 .getImm();
571 if (CurrentInstExportType == InstExportType) {
572 isLastInstructionOfItsType = false;
573 break;
574 }
575 }
576 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000577 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000578 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000579 return BB;
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000580 unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
581 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
582 .addOperand(MI.getOperand(0))
583 .addOperand(MI.getOperand(1))
584 .addOperand(MI.getOperand(2))
585 .addOperand(MI.getOperand(3))
586 .addOperand(MI.getOperand(4))
587 .addOperand(MI.getOperand(5))
588 .addOperand(MI.getOperand(6))
589 .addImm(CfInst)
590 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 break;
592 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000593 case AMDGPU::RETURN: {
594 // RETURN instructions must have the live-out registers as implicit uses,
595 // otherwise they appear dead.
596 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
597 MachineInstrBuilder MIB(*MF, MI);
598 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
599 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
600 return BB;
601 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 }
603
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000604 MI.eraseFromParent();
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 return BB;
606}
607
608//===----------------------------------------------------------------------===//
609// Custom DAG Lowering Operations
610//===----------------------------------------------------------------------===//
611
Tom Stellard75aadc22012-12-11 21:25:42 +0000612SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000613 MachineFunction &MF = DAG.getMachineFunction();
614 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000615 switch (Op.getOpcode()) {
616 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000617 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
618 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000619 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000620 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000621 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000622 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
623 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000624 case ISD::FCOS:
625 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000626 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000628 case ISD::LOAD: {
629 SDValue Result = LowerLOAD(Op, DAG);
630 assert((!Result.getNode() ||
631 Result.getNode()->getNumValues() == 2) &&
632 "Load should return a value and a chain");
633 return Result;
634 }
635
Matt Arsenault1d555c42014-06-23 18:00:55 +0000636 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000637 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000638 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000639 case ISD::INTRINSIC_VOID: {
640 SDValue Chain = Op.getOperand(0);
641 unsigned IntrinsicID =
642 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
643 switch (IntrinsicID) {
Matt Arsenault82e5e1e2016-07-15 21:27:08 +0000644 case AMDGPUIntrinsic::r600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000645 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000646 const SDValue Args[8] = {
647 Chain,
648 Op.getOperand(2), // Export Value
649 Op.getOperand(3), // ArrayBase
650 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000651 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
652 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
653 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
654 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000655 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000656 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000657 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000658
Tom Stellard75aadc22012-12-11 21:25:42 +0000659 // default for switch(IntrinsicID)
660 default: break;
661 }
662 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
663 break;
664 }
665 case ISD::INTRINSIC_WO_CHAIN: {
666 unsigned IntrinsicID =
667 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
668 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000669 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000670 switch(IntrinsicID) {
671 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Matt Arsenault59bd3012016-01-22 19:00:09 +0000672 case AMDGPUIntrinsic::r600_tex:
673 case AMDGPUIntrinsic::r600_texc:
674 case AMDGPUIntrinsic::r600_txl:
675 case AMDGPUIntrinsic::r600_txlc:
676 case AMDGPUIntrinsic::r600_txb:
677 case AMDGPUIntrinsic::r600_txbc:
678 case AMDGPUIntrinsic::r600_txf:
679 case AMDGPUIntrinsic::r600_txq:
680 case AMDGPUIntrinsic::r600_ddx:
Matt Arsenault648e4222016-07-14 05:23:23 +0000681 case AMDGPUIntrinsic::r600_ddy: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000682 unsigned TextureOp;
683 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000684 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000685 TextureOp = 0;
686 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000687 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000688 TextureOp = 1;
689 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000690 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000691 TextureOp = 2;
692 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000693 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000694 TextureOp = 3;
695 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000696 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000697 TextureOp = 4;
698 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000699 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000700 TextureOp = 5;
701 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000702 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000703 TextureOp = 6;
704 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000705 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000706 TextureOp = 7;
707 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000708 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000709 TextureOp = 8;
710 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000711 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000712 TextureOp = 9;
713 break;
714 default:
715 llvm_unreachable("Unknow Texture Operation");
716 }
717
718 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000719 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000720 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000721 DAG.getConstant(0, DL, MVT::i32),
722 DAG.getConstant(1, DL, MVT::i32),
723 DAG.getConstant(2, DL, MVT::i32),
724 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000725 Op.getOperand(2),
726 Op.getOperand(3),
727 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000728 DAG.getConstant(0, DL, MVT::i32),
729 DAG.getConstant(1, DL, MVT::i32),
730 DAG.getConstant(2, DL, MVT::i32),
731 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000732 Op.getOperand(5),
733 Op.getOperand(6),
734 Op.getOperand(7),
735 Op.getOperand(8),
736 Op.getOperand(9),
737 Op.getOperand(10)
738 };
Craig Topper48d114b2014-04-26 18:35:24 +0000739 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000740 }
Matt Arsenaultca7f5702016-07-14 05:47:17 +0000741 case AMDGPUIntrinsic::r600_dot4: {
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000742 SDValue Args[8] = {
743 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000744 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000756 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000758 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000759 };
Craig Topper48d114b2014-04-26 18:35:24 +0000760 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000761 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000762
Jan Vesely2fa28c32016-07-10 21:20:29 +0000763 case Intrinsic::r600_implicitarg_ptr: {
764 MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
765 uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
766 return DAG.getConstant(ByteOffset, DL, PtrVT);
767 }
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return LowerImplicitParameter(DAG, VT, DL, 8);
786
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000787 case Intrinsic::r600_read_workdim:
788 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000789 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
790 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
791 }
Jan Veselye5121f32014-10-14 20:05:26 +0000792
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
804 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
807 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000808 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
810 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000811
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000812 case Intrinsic::r600_recipsqrt_ieee:
813 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000814
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000815 case Intrinsic::r600_recipsqrt_clamped:
816 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000817 }
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000818
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
820 break;
821 }
822 } // end switch(Op.getOpcode())
823 return SDValue();
824}
825
826void R600TargetLowering::ReplaceNodeResults(SDNode *N,
827 SmallVectorImpl<SDValue> &Results,
828 SelectionDAG &DAG) const {
829 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000830 default:
831 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
832 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000833 case ISD::FP_TO_UINT:
834 if (N->getValueType(0) == MVT::i1) {
835 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
836 return;
837 }
838 // Fall-through. Since we don't care about out of bounds values
839 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
840 // considers some extra cases which are not necessary here.
841 case ISD::FP_TO_SINT: {
842 SDValue Result;
843 if (expandFP_TO_SINT(N, Result, DAG))
844 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000845 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000846 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000847 case ISD::SDIVREM: {
848 SDValue Op = SDValue(N, 1);
849 SDValue RES = LowerSDIVREM(Op, DAG);
850 Results.push_back(RES);
851 Results.push_back(RES.getValue(1));
852 break;
853 }
854 case ISD::UDIVREM: {
855 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000856 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000857 break;
858 }
859 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000860}
861
Tom Stellard880a80a2014-06-17 16:53:14 +0000862SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
863 SDValue Vector) const {
864
865 SDLoc DL(Vector);
866 EVT VecVT = Vector.getValueType();
867 EVT EltVT = VecVT.getVectorElementType();
868 SmallVector<SDValue, 8> Args;
869
870 for (unsigned i = 0, e = VecVT.getVectorNumElements();
871 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000872 Args.push_back(DAG.getNode(
873 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
874 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000875 }
876
877 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
878}
879
880SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
881 SelectionDAG &DAG) const {
882
883 SDLoc DL(Op);
884 SDValue Vector = Op.getOperand(0);
885 SDValue Index = Op.getOperand(1);
886
887 if (isa<ConstantSDNode>(Index) ||
888 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
889 return Op;
890
891 Vector = vectorToVerticalVector(DAG, Vector);
892 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
893 Vector, Index);
894}
895
896SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
897 SelectionDAG &DAG) const {
898 SDLoc DL(Op);
899 SDValue Vector = Op.getOperand(0);
900 SDValue Value = Op.getOperand(1);
901 SDValue Index = Op.getOperand(2);
902
903 if (isa<ConstantSDNode>(Index) ||
904 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
905 return Op;
906
907 Vector = vectorToVerticalVector(DAG, Vector);
908 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
909 Vector, Value, Index);
910 return vectorToVerticalVector(DAG, Insert);
911}
912
Tom Stellard27233b72016-05-02 18:05:17 +0000913SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
914 SDValue Op,
915 SelectionDAG &DAG) const {
916
917 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
918 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
919 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
920
921 const DataLayout &DL = DAG.getDataLayout();
922 const GlobalValue *GV = GSD->getGlobal();
Tom Stellard27233b72016-05-02 18:05:17 +0000923 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
924
Jan Veselyf97de002016-05-13 20:39:29 +0000925 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
926 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
Tom Stellard27233b72016-05-02 18:05:17 +0000927}
928
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000929SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
930 // On hw >= R700, COS/SIN input must be between -1. and 1.
931 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
932 EVT VT = Op.getValueType();
933 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000934 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000935
936 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000937 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
938 DAG.getNode(ISD::FADD, DL, VT,
939 DAG.getNode(ISD::FMUL, DL, VT, Arg,
940 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
941 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000942 unsigned TrigNode;
943 switch (Op.getOpcode()) {
944 case ISD::FCOS:
945 TrigNode = AMDGPUISD::COS_HW;
946 break;
947 case ISD::FSIN:
948 TrigNode = AMDGPUISD::SIN_HW;
949 break;
950 default:
951 llvm_unreachable("Wrong trig opcode");
952 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000953 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
954 DAG.getNode(ISD::FADD, DL, VT, FractPart,
955 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000956 if (Gen >= R600Subtarget::R700)
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000957 return TrigVal;
958 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000959 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
960 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000961}
962
Jan Vesely25f36272014-06-18 12:27:13 +0000963SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
964 SDLoc DL(Op);
965 EVT VT = Op.getValueType();
966
967 SDValue Lo = Op.getOperand(0);
968 SDValue Hi = Op.getOperand(1);
969 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000970 SDValue Zero = DAG.getConstant(0, DL, VT);
971 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000972
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000973 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
974 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000975 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
976 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
977
978 // The dance around Width1 is necessary for 0 special case.
979 // Without it the CompShift might be 32, producing incorrect results in
980 // Overflow. So we do the shift in two steps, the alternative is to
981 // add a conditional to filter the special case.
982
983 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
984 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
985
986 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
987 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
988 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
989
990 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
991 SDValue LoBig = Zero;
992
993 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
994 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
995
996 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
997}
998
Jan Vesely900ff2e2014-06-18 12:27:15 +0000999SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1000 SDLoc DL(Op);
1001 EVT VT = Op.getValueType();
1002
1003 SDValue Lo = Op.getOperand(0);
1004 SDValue Hi = Op.getOperand(1);
1005 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001006 SDValue Zero = DAG.getConstant(0, DL, VT);
1007 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001008
Jan Veselyecf51332014-06-18 12:27:17 +00001009 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1010
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001011 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1012 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001013 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1014 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1015
1016 // The dance around Width1 is necessary for 0 special case.
1017 // Without it the CompShift might be 32, producing incorrect results in
1018 // Overflow. So we do the shift in two steps, the alternative is to
1019 // add a conditional to filter the special case.
1020
1021 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1022 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1023
Jan Veselyecf51332014-06-18 12:27:17 +00001024 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001025 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1026 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1027
Jan Veselyecf51332014-06-18 12:27:17 +00001028 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1029 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001030
1031 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1032 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1033
1034 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1035}
1036
Jan Vesely808fff52015-04-30 17:15:56 +00001037SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1038 unsigned mainop, unsigned ovf) const {
1039 SDLoc DL(Op);
1040 EVT VT = Op.getValueType();
1041
1042 SDValue Lo = Op.getOperand(0);
1043 SDValue Hi = Op.getOperand(1);
1044
1045 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1046 // Extend sign.
1047 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1048 DAG.getValueType(MVT::i1));
1049
1050 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1051
1052 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1053}
1054
Tom Stellard75aadc22012-12-11 21:25:42 +00001055SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001056 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001057 return DAG.getNode(
1058 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001059 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001060 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001061 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001062 DAG.getCondCode(ISD::SETNE)
1063 );
1064}
1065
Tom Stellard75aadc22012-12-11 21:25:42 +00001066SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001067 const SDLoc &DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001068 unsigned DwordOffset) const {
1069 unsigned ByteOffset = DwordOffset * 4;
1070 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001071 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001072
1073 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1074 assert(isInt<16>(ByteOffset));
1075
1076 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001077 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Justin Lebar9c375812016-07-15 18:27:10 +00001078 MachinePointerInfo(ConstantPointerNull::get(PtrType)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001079}
1080
Tom Stellard75aadc22012-12-11 21:25:42 +00001081bool R600TargetLowering::isZero(SDValue Op) const {
1082 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1083 return Cst->isNullValue();
1084 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1085 return CstFP->isZero();
1086 } else {
1087 return false;
1088 }
1089}
1090
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001091bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1092 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1093 return CFP->isExactlyValue(1.0);
1094 }
1095 return isAllOnesConstant(Op);
1096}
1097
1098bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1099 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1100 return CFP->getValueAPF().isZero();
1101 }
1102 return isNullConstant(Op);
1103}
1104
Tom Stellard75aadc22012-12-11 21:25:42 +00001105SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001106 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001107 EVT VT = Op.getValueType();
1108
1109 SDValue LHS = Op.getOperand(0);
1110 SDValue RHS = Op.getOperand(1);
1111 SDValue True = Op.getOperand(2);
1112 SDValue False = Op.getOperand(3);
1113 SDValue CC = Op.getOperand(4);
1114 SDValue Temp;
1115
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001116 if (VT == MVT::f32) {
1117 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1118 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1119 if (MinMax)
1120 return MinMax;
1121 }
1122
Tom Stellard75aadc22012-12-11 21:25:42 +00001123 // LHS and RHS are guaranteed to be the same value type
1124 EVT CompareVT = LHS.getValueType();
1125
1126 // Check if we can lower this to a native operation.
1127
Tom Stellard2add82d2013-03-08 15:37:09 +00001128 // Try to lower to a SET* instruction:
1129 //
1130 // SET* can match the following patterns:
1131 //
Tom Stellardcd428182013-09-28 02:50:38 +00001132 // select_cc f32, f32, -1, 0, cc_supported
1133 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1134 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001135 //
1136
1137 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001138 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1139 ISD::CondCode InverseCC =
1140 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001141 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1142 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1143 std::swap(False, True);
1144 CC = DAG.getCondCode(InverseCC);
1145 } else {
1146 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1147 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1148 std::swap(False, True);
1149 std::swap(LHS, RHS);
1150 CC = DAG.getCondCode(SwapInvCC);
1151 }
1152 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001153 }
1154
1155 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1156 (CompareVT == VT || VT == MVT::i32)) {
1157 // This can be matched by a SET* instruction.
1158 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1159 }
1160
Tom Stellard75aadc22012-12-11 21:25:42 +00001161 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001162 //
1163 // CND* can match the following patterns:
1164 //
Tom Stellardcd428182013-09-28 02:50:38 +00001165 // select_cc f32, 0.0, f32, f32, cc_supported
1166 // select_cc f32, 0.0, i32, i32, cc_supported
1167 // select_cc i32, 0, f32, f32, cc_supported
1168 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001169 //
Tom Stellardcd428182013-09-28 02:50:38 +00001170
1171 // Try to move the zero value to the RHS
1172 if (isZero(LHS)) {
1173 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1174 // Try swapping the operands
1175 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1176 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1177 std::swap(LHS, RHS);
1178 CC = DAG.getCondCode(CCSwapped);
1179 } else {
1180 // Try inverting the conditon and then swapping the operands
1181 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1182 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1183 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1184 std::swap(True, False);
1185 std::swap(LHS, RHS);
1186 CC = DAG.getCondCode(CCSwapped);
1187 }
1188 }
1189 }
1190 if (isZero(RHS)) {
1191 SDValue Cond = LHS;
1192 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001193 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1194 if (CompareVT != VT) {
1195 // Bitcast True / False to the correct types. This will end up being
1196 // a nop, but it allows us to define only a single pattern in the
1197 // .TD files for each CND* instruction rather than having to have
1198 // one pattern for integer True/False and one for fp True/False
1199 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1200 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1201 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001202
1203 switch (CCOpcode) {
1204 case ISD::SETONE:
1205 case ISD::SETUNE:
1206 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001207 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1208 Temp = True;
1209 True = False;
1210 False = Temp;
1211 break;
1212 default:
1213 break;
1214 }
1215 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1216 Cond, Zero,
1217 True, False,
1218 DAG.getCondCode(CCOpcode));
1219 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1220 }
1221
Tom Stellard75aadc22012-12-11 21:25:42 +00001222 // If we make it this for it means we have no native instructions to handle
1223 // this SELECT_CC, so we must lower it.
1224 SDValue HWTrue, HWFalse;
1225
1226 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001227 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1228 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001229 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001230 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1231 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001232 }
1233 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001234 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001235 }
1236
1237 // Lower this unsupported SELECT_CC into a combination of two supported
1238 // SELECT_CC operations.
1239 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1240
1241 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1242 Cond, HWFalse,
1243 True, False,
1244 DAG.getCondCode(ISD::SETNE));
1245}
1246
Alp Tokercb402912014-01-24 17:20:08 +00001247/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001248/// convert these pointers to a register index. Each register holds
1249/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1250/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1251/// for indirect addressing.
1252SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1253 unsigned StackWidth,
1254 SelectionDAG &DAG) const {
1255 unsigned SRLPad;
1256 switch(StackWidth) {
1257 case 1:
1258 SRLPad = 2;
1259 break;
1260 case 2:
1261 SRLPad = 3;
1262 break;
1263 case 4:
1264 SRLPad = 4;
1265 break;
1266 default: llvm_unreachable("Invalid stack width");
1267 }
1268
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001269 SDLoc DL(Ptr);
1270 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1271 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001272}
1273
1274void R600TargetLowering::getStackAddress(unsigned StackWidth,
1275 unsigned ElemIdx,
1276 unsigned &Channel,
1277 unsigned &PtrIncr) const {
1278 switch (StackWidth) {
1279 default:
1280 case 1:
1281 Channel = 0;
1282 if (ElemIdx > 0) {
1283 PtrIncr = 1;
1284 } else {
1285 PtrIncr = 0;
1286 }
1287 break;
1288 case 2:
1289 Channel = ElemIdx % 2;
1290 if (ElemIdx == 2) {
1291 PtrIncr = 1;
1292 } else {
1293 PtrIncr = 0;
1294 }
1295 break;
1296 case 4:
1297 Channel = ElemIdx;
1298 PtrIncr = 0;
1299 break;
1300 }
1301}
1302
Matt Arsenault95245662016-02-11 05:32:46 +00001303SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1304 SelectionDAG &DAG) const {
1305 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001306
Matt Arsenault95245662016-02-11 05:32:46 +00001307 unsigned Mask = 0;
1308 if (Store->getMemoryVT() == MVT::i8) {
1309 Mask = 0xff;
1310 } else if (Store->getMemoryVT() == MVT::i16) {
1311 Mask = 0xffff;
1312 }
1313
1314 SDValue Chain = Store->getChain();
1315 SDValue BasePtr = Store->getBasePtr();
1316 EVT MemVT = Store->getMemoryVT();
1317
1318 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1319 DAG.getConstant(2, DL, MVT::i32));
1320 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1321 Chain, Ptr,
1322 DAG.getTargetConstant(0, DL, MVT::i32));
1323
1324 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1325 DAG.getConstant(0x3, DL, MVT::i32));
1326
1327 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1328 DAG.getConstant(3, DL, MVT::i32));
1329
1330 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1331 Store->getValue());
1332
1333 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1334
1335 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1336 MaskedValue, ShiftAmt);
1337
1338 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1339 DAG.getConstant(Mask, DL, MVT::i32),
1340 ShiftAmt);
1341 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1342 DAG.getConstant(0xffffffff, DL, MVT::i32));
1343 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1344
1345 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1346 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1347 Chain, Value, Ptr,
1348 DAG.getTargetConstant(0, DL, MVT::i32));
1349}
1350
1351SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1352 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001353 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001354
Matt Arsenault95245662016-02-11 05:32:46 +00001355 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1356 unsigned AS = StoreNode->getAddressSpace();
1357 SDValue Value = StoreNode->getValue();
1358 EVT ValueVT = Value.getValueType();
1359
1360 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1361 ValueVT.isVector()) {
1362 return SplitVectorStore(Op, DAG);
1363 }
1364
1365 SDLoc DL(Op);
1366 SDValue Chain = StoreNode->getChain();
1367 SDValue Ptr = StoreNode->getBasePtr();
1368
1369 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001370 if (StoreNode->isTruncatingStore()) {
1371 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001372 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001373 EVT MemVT = StoreNode->getMemoryVT();
1374 SDValue MaskConstant;
1375 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001376 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001377 } else {
1378 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001379 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001380 }
1381 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001382 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001383 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001384 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001385 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1386 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001387 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001388 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1389 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1390 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1391 // vector instead.
1392 SDValue Src[4] = {
1393 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001394 DAG.getConstant(0, DL, MVT::i32),
1395 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001396 Mask
1397 };
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001398 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001399 SDValue Args[3] = { Chain, Input, DWordAddr };
1400 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001401 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001402 StoreNode->getMemOperand());
1403 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001404 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001405 // Convert pointer from byte address to dword address.
1406 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1407 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001408 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001409
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001410 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001411 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001412 } else {
1413 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1414 }
1415 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001416 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001417 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001418
Matt Arsenault95245662016-02-11 05:32:46 +00001419 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001420 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001421
Matt Arsenault95245662016-02-11 05:32:46 +00001422 EVT MemVT = StoreNode->getMemoryVT();
1423 if (MemVT.bitsLT(MVT::i32))
1424 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001425
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001426 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001427 const MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001428 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001429 unsigned StackWidth = TFL->getStackWidth(MF);
1430
1431 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1432
1433 if (ValueVT.isVector()) {
1434 unsigned NumElemVT = ValueVT.getVectorNumElements();
1435 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001436 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001437
1438 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1439 "vector width in load");
1440
1441 for (unsigned i = 0; i < NumElemVT; ++i) {
1442 unsigned Channel, PtrIncr;
1443 getStackAddress(StackWidth, i, Channel, PtrIncr);
1444 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001445 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001446 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001447 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001448
1449 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1450 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001451 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001452 }
Craig Topper48d114b2014-04-26 18:35:24 +00001453 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001454 } else {
1455 if (ValueVT == MVT::i8) {
1456 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1457 }
1458 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001459 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001460 }
1461
1462 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001463}
1464
Tom Stellard365366f2013-01-23 02:09:06 +00001465// return (512 + (kc_bank << 12)
1466static int
1467ConstantAddressBlock(unsigned AddressSpace) {
1468 switch (AddressSpace) {
1469 case AMDGPUAS::CONSTANT_BUFFER_0:
1470 return 512;
1471 case AMDGPUAS::CONSTANT_BUFFER_1:
1472 return 512 + 4096;
1473 case AMDGPUAS::CONSTANT_BUFFER_2:
1474 return 512 + 4096 * 2;
1475 case AMDGPUAS::CONSTANT_BUFFER_3:
1476 return 512 + 4096 * 3;
1477 case AMDGPUAS::CONSTANT_BUFFER_4:
1478 return 512 + 4096 * 4;
1479 case AMDGPUAS::CONSTANT_BUFFER_5:
1480 return 512 + 4096 * 5;
1481 case AMDGPUAS::CONSTANT_BUFFER_6:
1482 return 512 + 4096 * 6;
1483 case AMDGPUAS::CONSTANT_BUFFER_7:
1484 return 512 + 4096 * 7;
1485 case AMDGPUAS::CONSTANT_BUFFER_8:
1486 return 512 + 4096 * 8;
1487 case AMDGPUAS::CONSTANT_BUFFER_9:
1488 return 512 + 4096 * 9;
1489 case AMDGPUAS::CONSTANT_BUFFER_10:
1490 return 512 + 4096 * 10;
1491 case AMDGPUAS::CONSTANT_BUFFER_11:
1492 return 512 + 4096 * 11;
1493 case AMDGPUAS::CONSTANT_BUFFER_12:
1494 return 512 + 4096 * 12;
1495 case AMDGPUAS::CONSTANT_BUFFER_13:
1496 return 512 + 4096 * 13;
1497 case AMDGPUAS::CONSTANT_BUFFER_14:
1498 return 512 + 4096 * 14;
1499 case AMDGPUAS::CONSTANT_BUFFER_15:
1500 return 512 + 4096 * 15;
1501 default:
1502 return -1;
1503 }
1504}
1505
Matt Arsenault6dfda962016-02-10 18:21:39 +00001506SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1507 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001508 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001509 LoadSDNode *Load = cast<LoadSDNode>(Op);
1510 ISD::LoadExtType ExtType = Load->getExtensionType();
1511 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001512
Matt Arsenault6dfda962016-02-10 18:21:39 +00001513 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1514 // register (2-)byte extract.
1515
1516 // Get Register holding the target.
1517 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1518 DAG.getConstant(2, DL, MVT::i32));
1519 // Load the Register.
1520 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1521 Load->getChain(),
1522 Ptr,
1523 DAG.getTargetConstant(0, DL, MVT::i32),
1524 Op.getOperand(2));
1525
1526 // Get offset within the register.
1527 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1528 Load->getBasePtr(),
1529 DAG.getConstant(0x3, DL, MVT::i32));
1530
1531 // Bit offset of target byte (byteIdx * 8).
1532 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1533 DAG.getConstant(3, DL, MVT::i32));
1534
1535 // Shift to the right.
1536 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1537
1538 // Eliminate the upper bits by setting them to ...
1539 EVT MemEltVT = MemVT.getScalarType();
1540
1541 // ... ones.
1542 if (ExtType == ISD::SEXTLOAD) {
1543 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1544
1545 SDValue Ops[] = {
1546 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1547 Load->getChain()
1548 };
1549
1550 return DAG.getMergeValues(Ops, DL);
1551 }
1552
1553 // ... or zeros.
1554 SDValue Ops[] = {
1555 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1556 Load->getChain()
1557 };
1558
1559 return DAG.getMergeValues(Ops, DL);
1560}
1561
1562SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1563 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1564 unsigned AS = LoadNode->getAddressSpace();
1565 EVT MemVT = LoadNode->getMemoryVT();
1566 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1567
1568 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1569 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1570 return lowerPrivateExtLoad(Op, DAG);
1571 }
1572
1573 SDLoc DL(Op);
1574 EVT VT = Op.getValueType();
1575 SDValue Chain = LoadNode->getChain();
1576 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001577
Tom Stellard35bb18c2013-08-26 15:06:04 +00001578 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1579 SDValue MergedValues[2] = {
Matt Arsenault9c499c32016-04-14 23:31:26 +00001580 scalarizeVectorLoad(LoadNode, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001581 Chain
1582 };
Craig Topper64941d92014-04-27 19:20:57 +00001583 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001584 }
1585
Tom Stellard365366f2013-01-23 02:09:06 +00001586 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001587 if (ConstantBlock > -1 &&
1588 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1589 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001590 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001591 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1592 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001593 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001594 SDValue Slots[4];
1595 for (unsigned i = 0; i < 4; i++) {
1596 // We want Const position encoded with the following formula :
1597 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1598 // const_index is Ptr computed by llvm using an alignment of 16.
1599 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1600 // then div by 4 at the ISel step
1601 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001602 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001603 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1604 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001605 EVT NewVT = MVT::v4i32;
1606 unsigned NumElements = 4;
1607 if (VT.isVector()) {
1608 NewVT = VT;
1609 NumElements = VT.getVectorNumElements();
1610 }
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001611 Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001612 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001613 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001614 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001615 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1616 DAG.getConstant(4, DL, MVT::i32)),
1617 DAG.getConstant(LoadNode->getAddressSpace() -
1618 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001619 );
1620 }
1621
1622 if (!VT.isVector()) {
1623 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001624 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001625 }
1626
1627 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001628 Result,
1629 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001630 };
Craig Topper64941d92014-04-27 19:20:57 +00001631 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001632 }
1633
Matt Arsenault6dfda962016-02-10 18:21:39 +00001634 SDValue LoweredLoad;
1635
Matt Arsenault909d0c02013-10-30 23:43:29 +00001636 // For most operations returning SDValue() will result in the node being
1637 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1638 // need to manually expand loads that may be legal in some address spaces and
1639 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1640 // compute shaders, since the data is sign extended when it is uploaded to the
1641 // buffer. However SEXT loads from other address spaces are not supported, so
1642 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001643 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1644 EVT MemVT = LoadNode->getMemoryVT();
1645 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Justin Lebar9c375812016-07-15 18:27:10 +00001646 SDValue NewLoad = DAG.getExtLoad(
1647 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1648 LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
Jan Veselyb670d372015-05-26 18:07:22 +00001649 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1650 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001651
Jan Veselyb670d372015-05-26 18:07:22 +00001652 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001653 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001654 }
1655
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001656 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1657 return SDValue();
1658 }
1659
1660 // Lowering for indirect addressing
1661 const MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001662 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001663 unsigned StackWidth = TFL->getStackWidth(MF);
1664
1665 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1666
1667 if (VT.isVector()) {
1668 unsigned NumElemVT = VT.getVectorNumElements();
1669 EVT ElemVT = VT.getVectorElementType();
1670 SDValue Loads[4];
1671
Jan Vesely687ca8d2016-05-16 23:56:32 +00001672 assert(NumElemVT <= 4);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001673 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1674 "vector width in load");
1675
1676 for (unsigned i = 0; i < NumElemVT; ++i) {
1677 unsigned Channel, PtrIncr;
1678 getStackAddress(StackWidth, i, Channel, PtrIncr);
1679 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001680 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001681 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1682 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001683 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001684 Op.getOperand(2));
1685 }
Jan Vesely687ca8d2016-05-16 23:56:32 +00001686 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
1687 LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001688 } else {
1689 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1690 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001691 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001692 Op.getOperand(2));
1693 }
1694
Matt Arsenault7939acd2014-04-07 16:44:24 +00001695 SDValue Ops[2] = {
1696 LoweredLoad,
1697 Chain
1698 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001699
Craig Topper64941d92014-04-27 19:20:57 +00001700 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001701}
Tom Stellard75aadc22012-12-11 21:25:42 +00001702
Matt Arsenault1d555c42014-06-23 18:00:55 +00001703SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1704 SDValue Chain = Op.getOperand(0);
1705 SDValue Cond = Op.getOperand(1);
1706 SDValue Jump = Op.getOperand(2);
1707
1708 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1709 Chain, Jump, Cond);
1710}
1711
Matt Arsenault81d06012016-03-07 21:10:13 +00001712SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1713 SelectionDAG &DAG) const {
1714 MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001715 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Matt Arsenault81d06012016-03-07 21:10:13 +00001716
1717 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1718
1719 unsigned FrameIndex = FIN->getIndex();
1720 unsigned IgnoredFrameReg;
1721 unsigned Offset =
1722 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1723 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1724 Op.getValueType());
1725}
1726
Tom Stellard75aadc22012-12-11 21:25:42 +00001727/// XXX Only kernel functions are supported, so we can assume for now that
1728/// every function is a kernel function, but in the future we should use
1729/// separate calling conventions for kernel and non-kernel functions.
1730SDValue R600TargetLowering::LowerFormalArguments(
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001731 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1732 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1733 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001734 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001735 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1736 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001737 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001738 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001739
Tom Stellardaf775432013-10-23 00:44:32 +00001740 SmallVector<ISD::InputArg, 8> LocalIns;
1741
Matt Arsenault209a7b92014-04-18 07:40:20 +00001742 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001743
1744 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001745
Tom Stellard1e803092013-07-23 01:48:18 +00001746 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001747 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001748 const ISD::InputArg &In = Ins[i];
1749 EVT VT = In.VT;
1750 EVT MemVT = VA.getLocVT();
1751 if (!VT.isVector() && MemVT.isVector()) {
1752 // Get load source type if scalarized.
1753 MemVT = MemVT.getVectorElementType();
1754 }
Tom Stellard78e01292013-07-23 01:47:58 +00001755
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001756 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001757 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1758 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1759 InVals.push_back(Register);
1760 continue;
1761 }
1762
Tom Stellard75aadc22012-12-11 21:25:42 +00001763 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001764 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001765
Matt Arsenaultfae02982014-03-17 18:58:11 +00001766 // i64 isn't a legal type, so the register type used ends up as i32, which
1767 // isn't expected here. It attempts to create this sextload, but it ends up
1768 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1769 // for <1 x i64>.
1770
Tom Stellardacfeebf2013-07-23 01:48:05 +00001771 // The first 36 bytes of the input buffer contains information about
1772 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001773 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1774 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1775 // FIXME: This should really check the extload type, but the handling of
1776 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001777
Matt Arsenault74ef2772014-08-13 18:14:11 +00001778 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1779 Ext = ISD::SEXTLOAD;
1780 }
1781
1782 // Compute the offset from the value.
1783 // XXX - I think PartOffset should give you this, but it seems to give the
1784 // size of the register which isn't useful.
1785
Andrew Trick05938a52015-02-16 18:10:47 +00001786 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001787 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001788 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001789
1790 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
Justin Lebar9c375812016-07-15 18:27:10 +00001791 SDValue Arg = DAG.getLoad(
1792 ISD::UNINDEXED, Ext, VT, DL, Chain,
1793 DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
1794 MemVT, /* Alignment = */ 4,
1795 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001796
1797 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001798 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001799 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001800 }
1801 return Chain;
1802}
1803
Mehdi Amini44ede332015-07-09 02:09:04 +00001804EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1805 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001806 if (!VT.isVector())
1807 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001808 return VT.changeVectorElementTypeToInteger();
1809}
1810
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001811bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1812 unsigned AddrSpace,
1813 unsigned Align,
1814 bool *IsFast) const {
1815 if (IsFast)
1816 *IsFast = false;
1817
1818 if (!VT.isSimple() || VT == MVT::Other)
1819 return false;
1820
1821 if (VT.bitsLT(MVT::i32))
1822 return false;
1823
1824 // TODO: This is a rough estimate.
1825 if (IsFast)
1826 *IsFast = true;
1827
1828 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1829}
1830
Matt Arsenault209a7b92014-04-18 07:40:20 +00001831static SDValue CompactSwizzlableVector(
1832 SelectionDAG &DAG, SDValue VectorEntry,
1833 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001834 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1835 assert(RemapSwizzle.empty());
1836 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001837 VectorEntry.getOperand(0),
1838 VectorEntry.getOperand(1),
1839 VectorEntry.getOperand(2),
1840 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001841 };
1842
1843 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001844 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001845 // We mask write here to teach later passes that the ith element of this
1846 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1847 // break false dependencies and additionnaly make assembly easier to read.
1848 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001849 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1850 if (C->isZero()) {
1851 RemapSwizzle[i] = 4; // SEL_0
1852 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1853 } else if (C->isExactlyValue(1.0)) {
1854 RemapSwizzle[i] = 5; // SEL_1
1855 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1856 }
1857 }
1858
Sanjay Patel57195842016-03-14 17:28:46 +00001859 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001860 continue;
1861 for (unsigned j = 0; j < i; j++) {
1862 if (NewBldVec[i] == NewBldVec[j]) {
1863 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1864 RemapSwizzle[i] = j;
1865 break;
1866 }
1867 }
1868 }
1869
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001870 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1871 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001872}
1873
Benjamin Kramer193960c2013-06-11 13:32:25 +00001874static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1875 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001876 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1877 assert(RemapSwizzle.empty());
1878 SDValue NewBldVec[4] = {
1879 VectorEntry.getOperand(0),
1880 VectorEntry.getOperand(1),
1881 VectorEntry.getOperand(2),
1882 VectorEntry.getOperand(3)
1883 };
1884 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001885 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001886 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001887 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1888 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1889 ->getZExtValue();
1890 if (i == Idx)
1891 isUnmovable[Idx] = true;
1892 }
1893 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001894
1895 for (unsigned i = 0; i < 4; i++) {
1896 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1897 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1898 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001899 if (isUnmovable[Idx])
1900 continue;
1901 // Swap i and Idx
1902 std::swap(NewBldVec[Idx], NewBldVec[i]);
1903 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1904 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001905 }
1906 }
1907
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001908 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1909 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001910}
1911
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001912SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1913 SelectionDAG &DAG,
1914 const SDLoc &DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001915 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1916 // Old -> New swizzle values
1917 DenseMap<unsigned, unsigned> SwizzleRemap;
1918
1919 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1920 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001921 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001922 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001923 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001924 }
1925
1926 SwizzleRemap.clear();
1927 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1928 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001929 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001930 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001931 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001932 }
1933
1934 return BuildVector;
1935}
1936
1937
Tom Stellard75aadc22012-12-11 21:25:42 +00001938//===----------------------------------------------------------------------===//
1939// Custom DAG Optimizations
1940//===----------------------------------------------------------------------===//
1941
1942SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1943 DAGCombinerInfo &DCI) const {
1944 SelectionDAG &DAG = DCI.DAG;
1945
1946 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001947 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001948 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1949 case ISD::FP_ROUND: {
1950 SDValue Arg = N->getOperand(0);
1951 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001952 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001953 Arg.getOperand(0));
1954 }
1955 break;
1956 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001957
1958 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1959 // (i32 select_cc f32, f32, -1, 0 cc)
1960 //
1961 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1962 // this to one of the SET*_DX10 instructions.
1963 case ISD::FP_TO_SINT: {
1964 SDValue FNeg = N->getOperand(0);
1965 if (FNeg.getOpcode() != ISD::FNEG) {
1966 return SDValue();
1967 }
1968 SDValue SelectCC = FNeg.getOperand(0);
1969 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1970 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1971 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1972 !isHWTrueValue(SelectCC.getOperand(2)) ||
1973 !isHWFalseValue(SelectCC.getOperand(3))) {
1974 return SDValue();
1975 }
1976
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001977 SDLoc dl(N);
1978 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001979 SelectCC.getOperand(0), // LHS
1980 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001981 DAG.getConstant(-1, dl, MVT::i32), // True
1982 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001983 SelectCC.getOperand(4)); // CC
1984
1985 break;
1986 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001987
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001988 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1989 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001990 case ISD::INSERT_VECTOR_ELT: {
1991 SDValue InVec = N->getOperand(0);
1992 SDValue InVal = N->getOperand(1);
1993 SDValue EltNo = N->getOperand(2);
1994 SDLoc dl(N);
1995
1996 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00001997 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00001998 return InVec;
1999
2000 EVT VT = InVec.getValueType();
2001
2002 // If we can't generate a legal BUILD_VECTOR, exit
2003 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
2004 return SDValue();
2005
2006 // Check that we know which element is being inserted
2007 if (!isa<ConstantSDNode>(EltNo))
2008 return SDValue();
2009 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2010
2011 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2012 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2013 // vector elements.
2014 SmallVector<SDValue, 8> Ops;
2015 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2016 Ops.append(InVec.getNode()->op_begin(),
2017 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00002018 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00002019 unsigned NElts = VT.getVectorNumElements();
2020 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2021 } else {
2022 return SDValue();
2023 }
2024
2025 // Insert the element
2026 if (Elt < Ops.size()) {
2027 // All the operands of BUILD_VECTOR must have the same type;
2028 // we enforce that here.
2029 EVT OpVT = Ops[0].getValueType();
2030 if (InVal.getValueType() != OpVT)
2031 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2032 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2033 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2034 Ops[Elt] = InVal;
2035 }
2036
2037 // Return the new vector
Ahmed Bougacha128f8732016-04-26 21:15:30 +00002038 return DAG.getBuildVector(VT, dl, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002039 }
2040
Tom Stellard365366f2013-01-23 02:09:06 +00002041 // Extract_vec (Build_vector) generated by custom lowering
2042 // also needs to be customly combined
2043 case ISD::EXTRACT_VECTOR_ELT: {
2044 SDValue Arg = N->getOperand(0);
2045 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2046 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2047 unsigned Element = Const->getZExtValue();
2048 return Arg->getOperand(Element);
2049 }
2050 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002051 if (Arg.getOpcode() == ISD::BITCAST &&
2052 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2053 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2054 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002055 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002056 Arg->getOperand(0).getOperand(Element));
2057 }
2058 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002059 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002060 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002061
2062 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002063 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002064 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002065 return Ret;
2066
Tom Stellarde06163a2013-02-07 14:02:35 +00002067 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2068 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002069 //
2070 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2071 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002072 SDValue LHS = N->getOperand(0);
2073 if (LHS.getOpcode() != ISD::SELECT_CC) {
2074 return SDValue();
2075 }
2076
2077 SDValue RHS = N->getOperand(1);
2078 SDValue True = N->getOperand(2);
2079 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002080 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002081
2082 if (LHS.getOperand(2).getNode() != True.getNode() ||
2083 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002084 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002085 return SDValue();
2086 }
2087
Tom Stellard5e524892013-03-08 15:37:11 +00002088 switch (NCC) {
2089 default: return SDValue();
2090 case ISD::SETNE: return LHS;
2091 case ISD::SETEQ: {
2092 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2093 LHSCC = ISD::getSetCCInverse(LHSCC,
2094 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002095 if (DCI.isBeforeLegalizeOps() ||
2096 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2097 return DAG.getSelectCC(SDLoc(N),
2098 LHS.getOperand(0),
2099 LHS.getOperand(1),
2100 LHS.getOperand(2),
2101 LHS.getOperand(3),
2102 LHSCC);
2103 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002104 }
Tom Stellard5e524892013-03-08 15:37:11 +00002105 }
Tom Stellardcd428182013-09-28 02:50:38 +00002106 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002107 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002108
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002109 case AMDGPUISD::EXPORT: {
2110 SDValue Arg = N->getOperand(1);
2111 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2112 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002113
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002114 SDValue NewArgs[8] = {
2115 N->getOperand(0), // Chain
2116 SDValue(),
2117 N->getOperand(2), // ArrayBase
2118 N->getOperand(3), // Type
2119 N->getOperand(4), // SWZ_X
2120 N->getOperand(5), // SWZ_Y
2121 N->getOperand(6), // SWZ_Z
2122 N->getOperand(7) // SWZ_W
2123 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002124 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002125 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002126 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002127 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002128 case AMDGPUISD::TEXTURE_FETCH: {
2129 SDValue Arg = N->getOperand(1);
2130 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2131 break;
2132
2133 SDValue NewArgs[19] = {
2134 N->getOperand(0),
2135 N->getOperand(1),
2136 N->getOperand(2),
2137 N->getOperand(3),
2138 N->getOperand(4),
2139 N->getOperand(5),
2140 N->getOperand(6),
2141 N->getOperand(7),
2142 N->getOperand(8),
2143 N->getOperand(9),
2144 N->getOperand(10),
2145 N->getOperand(11),
2146 N->getOperand(12),
2147 N->getOperand(13),
2148 N->getOperand(14),
2149 N->getOperand(15),
2150 N->getOperand(16),
2151 N->getOperand(17),
2152 N->getOperand(18),
2153 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002154 SDLoc DL(N);
2155 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2156 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002157 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002158 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002159
2160 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002161}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002162
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002163bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2164 SDValue &Src, SDValue &Neg, SDValue &Abs,
2165 SDValue &Sel, SDValue &Imm,
2166 SelectionDAG &DAG) const {
2167 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002168 if (!Src.isMachineOpcode())
2169 return false;
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002170
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002171 switch (Src.getMachineOpcode()) {
2172 case AMDGPU::FNEG_R600:
2173 if (!Neg.getNode())
2174 return false;
2175 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002176 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002177 return true;
2178 case AMDGPU::FABS_R600:
2179 if (!Abs.getNode())
2180 return false;
2181 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002182 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002183 return true;
2184 case AMDGPU::CONST_COPY: {
2185 unsigned Opcode = ParentNode->getMachineOpcode();
2186 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2187
2188 if (!Sel.getNode())
2189 return false;
2190
2191 SDValue CstOffset = Src.getOperand(0);
2192 if (ParentNode->getValueType(0).isVector())
2193 return false;
2194
2195 // Gather constants values
2196 int SrcIndices[] = {
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2208 };
2209 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002210 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002211 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2212 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2213 continue;
2214 if (HasDst) {
2215 OtherSrcIdx--;
2216 OtherSelIdx--;
2217 }
2218 if (RegisterSDNode *Reg =
2219 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2220 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002221 ConstantSDNode *Cst
2222 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002223 Consts.push_back(Cst->getZExtValue());
2224 }
2225 }
2226 }
2227
Matt Arsenault37c12d72014-05-12 20:42:57 +00002228 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002229 Consts.push_back(Cst->getZExtValue());
2230 if (!TII->fitsConstReadLimitations(Consts)) {
2231 return false;
2232 }
2233
2234 Sel = CstOffset;
2235 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2236 return true;
2237 }
Jan Vesely16800392016-05-13 20:39:31 +00002238 case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2239 // Check if the Imm slot is used. Taken from below.
2240 if (cast<ConstantSDNode>(Imm)->getZExtValue())
2241 return false;
2242 Imm = Src.getOperand(0);
2243 Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2244 return true;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002245 case AMDGPU::MOV_IMM_I32:
2246 case AMDGPU::MOV_IMM_F32: {
2247 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2248 uint64_t ImmValue = 0;
2249
2250
2251 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2252 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2253 float FloatValue = FPC->getValueAPF().convertToFloat();
2254 if (FloatValue == 0.0) {
2255 ImmReg = AMDGPU::ZERO;
2256 } else if (FloatValue == 0.5) {
2257 ImmReg = AMDGPU::HALF;
2258 } else if (FloatValue == 1.0) {
2259 ImmReg = AMDGPU::ONE;
2260 } else {
2261 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2262 }
2263 } else {
2264 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2265 uint64_t Value = C->getZExtValue();
2266 if (Value == 0) {
2267 ImmReg = AMDGPU::ZERO;
2268 } else if (Value == 1) {
2269 ImmReg = AMDGPU::ONE_INT;
2270 } else {
2271 ImmValue = Value;
2272 }
2273 }
2274
2275 // Check that we aren't already using an immediate.
2276 // XXX: It's possible for an instruction to have more than one
2277 // immediate operand, but this is not supported yet.
2278 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2279 if (!Imm.getNode())
2280 return false;
2281 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2282 assert(C);
2283 if (C->getZExtValue())
2284 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002285 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002286 }
2287 Src = DAG.getRegister(ImmReg, MVT::i32);
2288 return true;
2289 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002290 default:
2291 return false;
2292 }
2293}
2294
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002295/// \brief Fold the instructions after selecting them
2296SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2297 SelectionDAG &DAG) const {
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002298 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002299 if (!Node->isMachineOpcode())
2300 return Node;
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002301
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002302 unsigned Opcode = Node->getMachineOpcode();
2303 SDValue FakeOp;
2304
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002305 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002306
2307 if (Opcode == AMDGPU::DOT_4) {
2308 int OperandIdx[] = {
2309 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2310 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2311 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2312 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2313 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2314 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2315 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2316 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002317 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002318 int NegIdx[] = {
2319 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2320 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2321 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2322 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2323 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2324 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2325 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2326 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2327 };
2328 int AbsIdx[] = {
2329 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2330 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2331 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2332 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2333 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2334 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2335 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2336 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2337 };
2338 for (unsigned i = 0; i < 8; i++) {
2339 if (OperandIdx[i] < 0)
2340 return Node;
2341 SDValue &Src = Ops[OperandIdx[i] - 1];
2342 SDValue &Neg = Ops[NegIdx[i] - 1];
2343 SDValue &Abs = Ops[AbsIdx[i] - 1];
2344 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2345 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2346 if (HasDst)
2347 SelIdx--;
2348 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002349 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2350 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2351 }
2352 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2353 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2354 SDValue &Src = Ops[i];
2355 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002356 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2357 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002358 } else if (Opcode == AMDGPU::CLAMP_R600) {
2359 SDValue Src = Node->getOperand(0);
2360 if (!Src.isMachineOpcode() ||
2361 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2362 return Node;
2363 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2364 AMDGPU::OpName::clamp);
2365 if (ClampIdx < 0)
2366 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002367 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002368 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002369 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2370 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2371 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002372 } else {
2373 if (!TII->hasInstrModifiers(Opcode))
2374 return Node;
2375 int OperandIdx[] = {
2376 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2377 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2378 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2379 };
2380 int NegIdx[] = {
2381 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2382 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2383 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2384 };
2385 int AbsIdx[] = {
2386 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2387 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2388 -1
2389 };
2390 for (unsigned i = 0; i < 3; i++) {
2391 if (OperandIdx[i] < 0)
2392 return Node;
2393 SDValue &Src = Ops[OperandIdx[i] - 1];
2394 SDValue &Neg = Ops[NegIdx[i] - 1];
2395 SDValue FakeAbs;
2396 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2397 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2398 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002399 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2400 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002401 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002402 ImmIdx--;
2403 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002404 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002405 SDValue &Imm = Ops[ImmIdx];
2406 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002407 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2408 }
2409 }
2410
2411 return Node;
2412}