blob: c6bc6f8473891cace72da7929a6dd123ee0092a3 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard75aadc22012-12-11 21:25:42 +000037 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000038 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
39 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
Matt Arsenault71e66762016-05-21 02:27:49 +000040 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
41 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Matt Arsenault71e66762016-05-21 02:27:49 +000045 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, MVT::i32, Custom);
47 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
48 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
49
50 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51 // spaces, so it is custom lowered to handle those where it isn't.
52 for (MVT VT : MVT::integer_valuetypes()) {
53 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
54 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
55 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
56
57 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
58 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
59 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
60
61 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
62 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
63 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
64 }
65
Matt Arsenaultd1097a32016-06-02 19:54:26 +000066 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
67 setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
68 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
69 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
70
71 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
72 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
73 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
74
75
Matt Arsenault71e66762016-05-21 02:27:49 +000076 setOperationAction(ISD::STORE, MVT::i8, Custom);
77 setOperationAction(ISD::STORE, MVT::i32, Custom);
78 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
79 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
80
81 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
82 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
83
Matt Arsenaultd1097a32016-06-02 19:54:26 +000084 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
85 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
86 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
87
Tom Stellard0351ea22013-09-28 02:50:50 +000088 // Set condition code actions
89 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
90 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000091 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000092 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000093 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
94 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000095 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
96 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
97 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
98 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000099 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
100 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
101
102 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
103 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
104 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
105 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
106
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000107 setOperationAction(ISD::FCOS, MVT::f32, Custom);
108 setOperationAction(ISD::FSIN, MVT::f32, Custom);
109
Tom Stellard75aadc22012-12-11 21:25:42 +0000110 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000111 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000112
Tom Stellard492ebea2013-03-08 15:37:07 +0000113 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
114 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +0000115 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000116
117 setOperationAction(ISD::FSUB, MVT::f32, Expand);
118
Tom Stellard75aadc22012-12-11 21:25:42 +0000119 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
120 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
121
Tom Stellarde8f9f282013-03-08 15:37:05 +0000122 setOperationAction(ISD::SETCC, MVT::i32, Expand);
123 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000124 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000125 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
126 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000127
Tom Stellard53f2f902013-09-05 18:38:03 +0000128 setOperationAction(ISD::SELECT, MVT::i32, Expand);
129 setOperationAction(ISD::SELECT, MVT::f32, Expand);
130 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +0000131 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000132
Jan Vesely808fff52015-04-30 17:15:56 +0000133 // ADD, SUB overflow.
134 // TODO: turn these into Legal?
135 if (Subtarget->hasCARRY())
136 setOperationAction(ISD::UADDO, MVT::i32, Custom);
137
138 if (Subtarget->hasBORROW())
139 setOperationAction(ISD::USUBO, MVT::i32, Custom);
140
Matt Arsenault4e466652014-04-16 01:41:30 +0000141 // Expand sign extension of vectors
142 if (!Subtarget->hasBFE())
143 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
144
145 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
146 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
147
148 if (!Subtarget->hasBFE())
149 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
150 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
151 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
152
153 if (!Subtarget->hasBFE())
154 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
157
158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
161
162 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
163
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000164 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
165
Tom Stellard880a80a2014-06-17 16:53:14 +0000166 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
167 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
168 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
169 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
170
171 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
172 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
173 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
174 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
175
Jan Vesely25f36272014-06-18 12:27:13 +0000176 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
177 // to be Legal/Custom in order to avoid library calls.
178 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000179 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000180 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000181
Michel Danzer49812b52013-07-10 16:37:07 +0000182 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
183
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000184 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
185 for (MVT VT : ScalarIntVTs) {
186 setOperationAction(ISD::ADDC, VT, Expand);
187 setOperationAction(ISD::SUBC, VT, Expand);
188 setOperationAction(ISD::ADDE, VT, Expand);
189 setOperationAction(ISD::SUBE, VT, Expand);
190 }
191
Tom Stellardfc455472013-08-12 22:33:21 +0000192 setSchedulingPreference(Sched::Source);
Matt Arsenault71e66762016-05-21 02:27:49 +0000193
194
195 setTargetDAGCombine(ISD::FP_ROUND);
196 setTargetDAGCombine(ISD::FP_TO_SINT);
197 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
198 setTargetDAGCombine(ISD::SELECT_CC);
199 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000200}
201
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000202static inline bool isEOP(MachineBasicBlock::iterator I) {
203 return std::next(I)->getOpcode() == AMDGPU::RETURN;
204}
205
Tom Stellard75aadc22012-12-11 21:25:42 +0000206MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
207 MachineInstr * MI, MachineBasicBlock * BB) const {
208 MachineFunction * MF = BB->getParent();
209 MachineRegisterInfo &MRI = MF->getRegInfo();
210 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000211 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000212 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000213
214 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000215 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000216 // Replace LDS_*_RET instruction that don't have any uses with the
217 // equivalent LDS_*_NORET instruction.
218 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000219 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
220 assert(DstIdx != -1);
221 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000222 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
223 // LDS_1A2D support and remove this special case.
224 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
225 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000226 return BB;
227
228 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
229 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000230 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
231 NewMI.addOperand(MI->getOperand(i));
232 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000233 } else {
234 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
235 }
236 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000237 case AMDGPU::CLAMP_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
243 break;
244 }
245
246 case AMDGPU::FABS_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
252 break;
253 }
254
255 case AMDGPU::FNEG_R600: {
256 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
257 AMDGPU::MOV,
258 MI->getOperand(0).getReg(),
259 MI->getOperand(1).getReg());
260 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
261 break;
262 }
263
Tom Stellard75aadc22012-12-11 21:25:42 +0000264 case AMDGPU::MASK_WRITE: {
265 unsigned maskedRegister = MI->getOperand(0).getReg();
266 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
267 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
268 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
269 break;
270 }
271
272 case AMDGPU::MOV_IMM_F32:
273 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
274 MI->getOperand(1).getFPImm()->getValueAPF()
275 .bitcastToAPInt().getZExtValue());
276 break;
277 case AMDGPU::MOV_IMM_I32:
278 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
279 MI->getOperand(1).getImm());
280 break;
Jan Veselyf97de002016-05-13 20:39:29 +0000281 case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
282 //TODO: Perhaps combine this instruction with the next if possible
283 auto MIB = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
284 MI->getOperand(0).getReg(),
285 AMDGPU::ALU_LITERAL_X);
286 int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
287 //TODO: Ugh this is rather ugly
288 MIB->getOperand(Idx) = MI->getOperand(1);
289 break;
290 }
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000291 case AMDGPU::CONST_COPY: {
292 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
293 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000294 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000295 MI->getOperand(1).getImm());
296 break;
297 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000298
299 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000300 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000301 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000302 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
303 .addOperand(MI->getOperand(0))
304 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000305 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000306 break;
307 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000308 case AMDGPU::RAT_STORE_TYPED_eg: {
309 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
310 .addOperand(MI->getOperand(0))
311 .addOperand(MI->getOperand(1))
312 .addOperand(MI->getOperand(2))
313 .addImm(isEOP(I)); // Set End of program bit
314 break;
315 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000316
Tom Stellard75aadc22012-12-11 21:25:42 +0000317 case AMDGPU::TXD: {
318 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
319 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000320 MachineOperand &RID = MI->getOperand(4);
321 MachineOperand &SID = MI->getOperand(5);
322 unsigned TextureId = MI->getOperand(6).getImm();
323 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
324 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000325
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000326 switch (TextureId) {
327 case 5: // Rect
328 CTX = CTY = 0;
329 break;
330 case 6: // Shadow1D
331 SrcW = SrcZ;
332 break;
333 case 7: // Shadow2D
334 SrcW = SrcZ;
335 break;
336 case 8: // ShadowRect
337 CTX = CTY = 0;
338 SrcW = SrcZ;
339 break;
340 case 9: // 1DArray
341 SrcZ = SrcY;
342 CTZ = 0;
343 break;
344 case 10: // 2DArray
345 CTZ = 0;
346 break;
347 case 11: // Shadow1DArray
348 SrcZ = SrcY;
349 CTZ = 0;
350 break;
351 case 12: // Shadow2DArray
352 CTZ = 0;
353 break;
354 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
356 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
375 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000376 .addImm(SrcX)
377 .addImm(SrcY)
378 .addImm(SrcZ)
379 .addImm(SrcW)
380 .addImm(0)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(1)
385 .addImm(2)
386 .addImm(3)
387 .addOperand(RID)
388 .addOperand(SID)
389 .addImm(CTX)
390 .addImm(CTY)
391 .addImm(CTZ)
392 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000393 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
394 .addOperand(MI->getOperand(0))
395 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000396 .addImm(SrcX)
397 .addImm(SrcY)
398 .addImm(SrcZ)
399 .addImm(SrcW)
400 .addImm(0)
401 .addImm(0)
402 .addImm(0)
403 .addImm(0)
404 .addImm(1)
405 .addImm(2)
406 .addImm(3)
407 .addOperand(RID)
408 .addOperand(SID)
409 .addImm(CTX)
410 .addImm(CTY)
411 .addImm(CTZ)
412 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000413 .addReg(T0, RegState::Implicit)
414 .addReg(T1, RegState::Implicit);
415 break;
416 }
417
418 case AMDGPU::TXD_SHADOW: {
419 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
420 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000421 MachineOperand &RID = MI->getOperand(4);
422 MachineOperand &SID = MI->getOperand(5);
423 unsigned TextureId = MI->getOperand(6).getImm();
424 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
425 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
426
427 switch (TextureId) {
428 case 5: // Rect
429 CTX = CTY = 0;
430 break;
431 case 6: // Shadow1D
432 SrcW = SrcZ;
433 break;
434 case 7: // Shadow2D
435 SrcW = SrcZ;
436 break;
437 case 8: // ShadowRect
438 CTX = CTY = 0;
439 SrcW = SrcZ;
440 break;
441 case 9: // 1DArray
442 SrcZ = SrcY;
443 CTZ = 0;
444 break;
445 case 10: // 2DArray
446 CTZ = 0;
447 break;
448 case 11: // Shadow1DArray
449 SrcZ = SrcY;
450 CTZ = 0;
451 break;
452 case 12: // Shadow2DArray
453 CTZ = 0;
454 break;
455 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000456
457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
458 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
477 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000478 .addImm(SrcX)
479 .addImm(SrcY)
480 .addImm(SrcZ)
481 .addImm(SrcW)
482 .addImm(0)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(1)
487 .addImm(2)
488 .addImm(3)
489 .addOperand(RID)
490 .addOperand(SID)
491 .addImm(CTX)
492 .addImm(CTY)
493 .addImm(CTZ)
494 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
496 .addOperand(MI->getOperand(0))
497 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000498 .addImm(SrcX)
499 .addImm(SrcY)
500 .addImm(SrcZ)
501 .addImm(SrcW)
502 .addImm(0)
503 .addImm(0)
504 .addImm(0)
505 .addImm(0)
506 .addImm(1)
507 .addImm(2)
508 .addImm(3)
509 .addOperand(RID)
510 .addOperand(SID)
511 .addImm(CTX)
512 .addImm(CTY)
513 .addImm(CTZ)
514 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addReg(T0, RegState::Implicit)
516 .addReg(T1, RegState::Implicit);
517 break;
518 }
519
520 case AMDGPU::BRANCH:
521 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000522 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000523 break;
524
525 case AMDGPU::BRANCH_COND_f32: {
526 MachineInstr *NewMI =
527 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
528 AMDGPU::PREDICATE_BIT)
529 .addOperand(MI->getOperand(1))
530 .addImm(OPCODE_IS_NOT_ZERO)
531 .addImm(0); // Flags
532 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000533 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 .addOperand(MI->getOperand(0))
535 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
536 break;
537 }
538
539 case AMDGPU::BRANCH_COND_i32: {
540 MachineInstr *NewMI =
541 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
542 AMDGPU::PREDICATE_BIT)
543 .addOperand(MI->getOperand(1))
544 .addImm(OPCODE_IS_NOT_ZERO_INT)
545 .addImm(0); // Flags
546 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000547 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000548 .addOperand(MI->getOperand(0))
549 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
550 break;
551 }
552
Tom Stellard75aadc22012-12-11 21:25:42 +0000553 case AMDGPU::EG_ExportSwz:
554 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000555 // Instruction is left unmodified if its not the last one of its type
556 bool isLastInstructionOfItsType = true;
557 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000558 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000559 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000560 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000561 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
562 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
563 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
564 .getImm();
565 if (CurrentInstExportType == InstExportType) {
566 isLastInstructionOfItsType = false;
567 break;
568 }
569 }
570 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000571 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000572 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000573 return BB;
574 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
575 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
576 .addOperand(MI->getOperand(0))
577 .addOperand(MI->getOperand(1))
578 .addOperand(MI->getOperand(2))
579 .addOperand(MI->getOperand(3))
580 .addOperand(MI->getOperand(4))
581 .addOperand(MI->getOperand(5))
582 .addOperand(MI->getOperand(6))
583 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000584 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000585 break;
586 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000587 case AMDGPU::RETURN: {
588 // RETURN instructions must have the live-out registers as implicit uses,
589 // otherwise they appear dead.
590 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
591 MachineInstrBuilder MIB(*MF, MI);
592 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
593 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
594 return BB;
595 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000596 }
597
598 MI->eraseFromParent();
599 return BB;
600}
601
602//===----------------------------------------------------------------------===//
603// Custom DAG Lowering Operations
604//===----------------------------------------------------------------------===//
605
Tom Stellard75aadc22012-12-11 21:25:42 +0000606SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000607 MachineFunction &MF = DAG.getMachineFunction();
608 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000609 switch (Op.getOpcode()) {
610 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000611 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
612 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000613 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000614 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000615 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000616 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
617 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000618 case ISD::FCOS:
619 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000621 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000622 case ISD::LOAD: {
623 SDValue Result = LowerLOAD(Op, DAG);
624 assert((!Result.getNode() ||
625 Result.getNode()->getNumValues() == 2) &&
626 "Load should return a value and a chain");
627 return Result;
628 }
629
Matt Arsenault1d555c42014-06-23 18:00:55 +0000630 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000631 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000632 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000633 case ISD::INTRINSIC_VOID: {
634 SDValue Chain = Op.getOperand(0);
635 unsigned IntrinsicID =
636 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
637 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000638 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000639 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000640 const SDValue Args[8] = {
641 Chain,
642 Op.getOperand(2), // Export Value
643 Op.getOperand(3), // ArrayBase
644 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000645 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
646 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
647 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
648 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000649 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000650 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000651 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000652
Tom Stellard75aadc22012-12-11 21:25:42 +0000653 // default for switch(IntrinsicID)
654 default: break;
655 }
656 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
657 break;
658 }
659 case ISD::INTRINSIC_WO_CHAIN: {
660 unsigned IntrinsicID =
661 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
662 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000663 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000664 switch(IntrinsicID) {
665 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000666 case AMDGPUIntrinsic::R600_interp_xy:
667 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000668 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000669 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000670 SDValue RegisterINode = Op.getOperand(2);
671 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000672
Vincent Lejeunef143af32013-11-11 22:10:24 +0000673 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000674 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000675 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000676 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000677 else
678 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000679 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000680 RegisterJNode, RegisterINode);
Ahmed Bougacha128f8732016-04-26 21:15:30 +0000681 return DAG.getBuildVector(MVT::v2f32, DL,
682 {SDValue(interp, 0), SDValue(interp, 1)});
Tom Stellard75aadc22012-12-11 21:25:42 +0000683 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000684 case AMDGPUIntrinsic::r600_tex:
685 case AMDGPUIntrinsic::r600_texc:
686 case AMDGPUIntrinsic::r600_txl:
687 case AMDGPUIntrinsic::r600_txlc:
688 case AMDGPUIntrinsic::r600_txb:
689 case AMDGPUIntrinsic::r600_txbc:
690 case AMDGPUIntrinsic::r600_txf:
691 case AMDGPUIntrinsic::r600_txq:
692 case AMDGPUIntrinsic::r600_ddx:
693 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000694 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000695 unsigned TextureOp;
696 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000697 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000698 TextureOp = 0;
699 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000700 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000701 TextureOp = 1;
702 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000703 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000704 TextureOp = 2;
705 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000706 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000707 TextureOp = 3;
708 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000709 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000710 TextureOp = 4;
711 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000712 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000713 TextureOp = 5;
714 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000715 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 TextureOp = 6;
717 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000718 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000719 TextureOp = 7;
720 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000721 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000722 TextureOp = 8;
723 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000724 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000725 TextureOp = 9;
726 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000727 case AMDGPUIntrinsic::R600_ldptr:
728 TextureOp = 10;
729 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000730 default:
731 llvm_unreachable("Unknow Texture Operation");
732 }
733
734 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000735 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000736 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000737 DAG.getConstant(0, DL, MVT::i32),
738 DAG.getConstant(1, DL, MVT::i32),
739 DAG.getConstant(2, DL, MVT::i32),
740 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000741 Op.getOperand(2),
742 Op.getOperand(3),
743 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000744 DAG.getConstant(0, DL, MVT::i32),
745 DAG.getConstant(1, DL, MVT::i32),
746 DAG.getConstant(2, DL, MVT::i32),
747 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000748 Op.getOperand(5),
749 Op.getOperand(6),
750 Op.getOperand(7),
751 Op.getOperand(8),
752 Op.getOperand(9),
753 Op.getOperand(10)
754 };
Craig Topper48d114b2014-04-26 18:35:24 +0000755 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000756 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 case AMDGPUIntrinsic::AMDGPU_dp4: {
758 SDValue Args[8] = {
759 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000760 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000761 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000762 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000763 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000764 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000765 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000766 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000767 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000768 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000769 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000770 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000771 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000772 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000773 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000774 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000775 };
Craig Topper48d114b2014-04-26 18:35:24 +0000776 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000777 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000778
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000779 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000781 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000782 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000783 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000784 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000785 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000786 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000787 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000788 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000789 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000790 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000791 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000792 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return LowerImplicitParameter(DAG, VT, DL, 8);
797
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000798 case Intrinsic::r600_read_workdim:
799 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000800 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
801 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
802 }
Jan Veselye5121f32014-10-14 20:05:26 +0000803
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
806 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
809 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000810 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
812 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000813 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
815 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000816 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000817 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
818 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000819 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000820 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
821 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000822
823 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000824 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenault79963e82016-02-13 01:03:00 +0000825 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000826
827 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000828 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000829 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
830 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000831 }
832 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
833 break;
834 }
835 } // end switch(Op.getOpcode())
836 return SDValue();
837}
838
839void R600TargetLowering::ReplaceNodeResults(SDNode *N,
840 SmallVectorImpl<SDValue> &Results,
841 SelectionDAG &DAG) const {
842 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000843 default:
844 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
845 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000846 case ISD::FP_TO_UINT:
847 if (N->getValueType(0) == MVT::i1) {
848 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
849 return;
850 }
851 // Fall-through. Since we don't care about out of bounds values
852 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
853 // considers some extra cases which are not necessary here.
854 case ISD::FP_TO_SINT: {
855 SDValue Result;
856 if (expandFP_TO_SINT(N, Result, DAG))
857 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000858 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000859 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000860 case ISD::SDIVREM: {
861 SDValue Op = SDValue(N, 1);
862 SDValue RES = LowerSDIVREM(Op, DAG);
863 Results.push_back(RES);
864 Results.push_back(RES.getValue(1));
865 break;
866 }
867 case ISD::UDIVREM: {
868 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000869 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000870 break;
871 }
872 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000873}
874
Tom Stellard880a80a2014-06-17 16:53:14 +0000875SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
876 SDValue Vector) const {
877
878 SDLoc DL(Vector);
879 EVT VecVT = Vector.getValueType();
880 EVT EltVT = VecVT.getVectorElementType();
881 SmallVector<SDValue, 8> Args;
882
883 for (unsigned i = 0, e = VecVT.getVectorNumElements();
884 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000885 Args.push_back(DAG.getNode(
886 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
887 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000888 }
889
890 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
891}
892
893SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
894 SelectionDAG &DAG) const {
895
896 SDLoc DL(Op);
897 SDValue Vector = Op.getOperand(0);
898 SDValue Index = Op.getOperand(1);
899
900 if (isa<ConstantSDNode>(Index) ||
901 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
902 return Op;
903
904 Vector = vectorToVerticalVector(DAG, Vector);
905 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
906 Vector, Index);
907}
908
909SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
910 SelectionDAG &DAG) const {
911 SDLoc DL(Op);
912 SDValue Vector = Op.getOperand(0);
913 SDValue Value = Op.getOperand(1);
914 SDValue Index = Op.getOperand(2);
915
916 if (isa<ConstantSDNode>(Index) ||
917 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
918 return Op;
919
920 Vector = vectorToVerticalVector(DAG, Vector);
921 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
922 Vector, Value, Index);
923 return vectorToVerticalVector(DAG, Insert);
924}
925
Tom Stellard27233b72016-05-02 18:05:17 +0000926SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
927 SDValue Op,
928 SelectionDAG &DAG) const {
929
930 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
931 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
932 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
933
934 const DataLayout &DL = DAG.getDataLayout();
935 const GlobalValue *GV = GSD->getGlobal();
Tom Stellard27233b72016-05-02 18:05:17 +0000936 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
937
Jan Veselyf97de002016-05-13 20:39:29 +0000938 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
939 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
Tom Stellard27233b72016-05-02 18:05:17 +0000940}
941
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000942SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
943 // On hw >= R700, COS/SIN input must be between -1. and 1.
944 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
945 EVT VT = Op.getValueType();
946 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000947 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000948
949 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000950 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
951 DAG.getNode(ISD::FADD, DL, VT,
952 DAG.getNode(ISD::FMUL, DL, VT, Arg,
953 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
954 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000955 unsigned TrigNode;
956 switch (Op.getOpcode()) {
957 case ISD::FCOS:
958 TrigNode = AMDGPUISD::COS_HW;
959 break;
960 case ISD::FSIN:
961 TrigNode = AMDGPUISD::SIN_HW;
962 break;
963 default:
964 llvm_unreachable("Wrong trig opcode");
965 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000966 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
967 DAG.getNode(ISD::FADD, DL, VT, FractPart,
968 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000969 if (Gen >= AMDGPUSubtarget::R700)
970 return TrigVal;
971 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000972 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
973 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000974}
975
Jan Vesely25f36272014-06-18 12:27:13 +0000976SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
977 SDLoc DL(Op);
978 EVT VT = Op.getValueType();
979
980 SDValue Lo = Op.getOperand(0);
981 SDValue Hi = Op.getOperand(1);
982 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000983 SDValue Zero = DAG.getConstant(0, DL, VT);
984 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000985
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000986 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
987 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000988 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
989 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
990
991 // The dance around Width1 is necessary for 0 special case.
992 // Without it the CompShift might be 32, producing incorrect results in
993 // Overflow. So we do the shift in two steps, the alternative is to
994 // add a conditional to filter the special case.
995
996 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
997 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
998
999 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1000 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1001 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1002
1003 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1004 SDValue LoBig = Zero;
1005
1006 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1007 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1008
1009 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1010}
1011
Jan Vesely900ff2e2014-06-18 12:27:15 +00001012SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1013 SDLoc DL(Op);
1014 EVT VT = Op.getValueType();
1015
1016 SDValue Lo = Op.getOperand(0);
1017 SDValue Hi = Op.getOperand(1);
1018 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001019 SDValue Zero = DAG.getConstant(0, DL, VT);
1020 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001021
Jan Veselyecf51332014-06-18 12:27:17 +00001022 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1023
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001024 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1025 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001026 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1027 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1028
1029 // The dance around Width1 is necessary for 0 special case.
1030 // Without it the CompShift might be 32, producing incorrect results in
1031 // Overflow. So we do the shift in two steps, the alternative is to
1032 // add a conditional to filter the special case.
1033
1034 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1035 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1036
Jan Veselyecf51332014-06-18 12:27:17 +00001037 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001038 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1039 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1040
Jan Veselyecf51332014-06-18 12:27:17 +00001041 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1042 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001043
1044 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1045 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1046
1047 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1048}
1049
Jan Vesely808fff52015-04-30 17:15:56 +00001050SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1051 unsigned mainop, unsigned ovf) const {
1052 SDLoc DL(Op);
1053 EVT VT = Op.getValueType();
1054
1055 SDValue Lo = Op.getOperand(0);
1056 SDValue Hi = Op.getOperand(1);
1057
1058 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1059 // Extend sign.
1060 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1061 DAG.getValueType(MVT::i1));
1062
1063 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1064
1065 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1066}
1067
Tom Stellard75aadc22012-12-11 21:25:42 +00001068SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001069 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001070 return DAG.getNode(
1071 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001072 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001073 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001074 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001075 DAG.getCondCode(ISD::SETNE)
1076 );
1077}
1078
Tom Stellard75aadc22012-12-11 21:25:42 +00001079SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001080 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001081 unsigned DwordOffset) const {
1082 unsigned ByteOffset = DwordOffset * 4;
1083 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001084 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001085
1086 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1087 assert(isInt<16>(ByteOffset));
1088
1089 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001090 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001091 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1092 false, false, false, 0);
1093}
1094
Tom Stellard75aadc22012-12-11 21:25:42 +00001095bool R600TargetLowering::isZero(SDValue Op) const {
1096 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1097 return Cst->isNullValue();
1098 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1099 return CstFP->isZero();
1100 } else {
1101 return false;
1102 }
1103}
1104
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001105bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1106 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1107 return CFP->isExactlyValue(1.0);
1108 }
1109 return isAllOnesConstant(Op);
1110}
1111
1112bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1113 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1114 return CFP->getValueAPF().isZero();
1115 }
1116 return isNullConstant(Op);
1117}
1118
Tom Stellard75aadc22012-12-11 21:25:42 +00001119SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001120 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001121 EVT VT = Op.getValueType();
1122
1123 SDValue LHS = Op.getOperand(0);
1124 SDValue RHS = Op.getOperand(1);
1125 SDValue True = Op.getOperand(2);
1126 SDValue False = Op.getOperand(3);
1127 SDValue CC = Op.getOperand(4);
1128 SDValue Temp;
1129
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001130 if (VT == MVT::f32) {
1131 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1132 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1133 if (MinMax)
1134 return MinMax;
1135 }
1136
Tom Stellard75aadc22012-12-11 21:25:42 +00001137 // LHS and RHS are guaranteed to be the same value type
1138 EVT CompareVT = LHS.getValueType();
1139
1140 // Check if we can lower this to a native operation.
1141
Tom Stellard2add82d2013-03-08 15:37:09 +00001142 // Try to lower to a SET* instruction:
1143 //
1144 // SET* can match the following patterns:
1145 //
Tom Stellardcd428182013-09-28 02:50:38 +00001146 // select_cc f32, f32, -1, 0, cc_supported
1147 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1148 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001149 //
1150
1151 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001152 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1153 ISD::CondCode InverseCC =
1154 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001155 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1156 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1157 std::swap(False, True);
1158 CC = DAG.getCondCode(InverseCC);
1159 } else {
1160 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1161 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1162 std::swap(False, True);
1163 std::swap(LHS, RHS);
1164 CC = DAG.getCondCode(SwapInvCC);
1165 }
1166 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001167 }
1168
1169 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1170 (CompareVT == VT || VT == MVT::i32)) {
1171 // This can be matched by a SET* instruction.
1172 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1173 }
1174
Tom Stellard75aadc22012-12-11 21:25:42 +00001175 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001176 //
1177 // CND* can match the following patterns:
1178 //
Tom Stellardcd428182013-09-28 02:50:38 +00001179 // select_cc f32, 0.0, f32, f32, cc_supported
1180 // select_cc f32, 0.0, i32, i32, cc_supported
1181 // select_cc i32, 0, f32, f32, cc_supported
1182 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001183 //
Tom Stellardcd428182013-09-28 02:50:38 +00001184
1185 // Try to move the zero value to the RHS
1186 if (isZero(LHS)) {
1187 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1188 // Try swapping the operands
1189 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1190 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1191 std::swap(LHS, RHS);
1192 CC = DAG.getCondCode(CCSwapped);
1193 } else {
1194 // Try inverting the conditon and then swapping the operands
1195 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1196 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1197 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1198 std::swap(True, False);
1199 std::swap(LHS, RHS);
1200 CC = DAG.getCondCode(CCSwapped);
1201 }
1202 }
1203 }
1204 if (isZero(RHS)) {
1205 SDValue Cond = LHS;
1206 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001207 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1208 if (CompareVT != VT) {
1209 // Bitcast True / False to the correct types. This will end up being
1210 // a nop, but it allows us to define only a single pattern in the
1211 // .TD files for each CND* instruction rather than having to have
1212 // one pattern for integer True/False and one for fp True/False
1213 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1214 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1215 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001216
1217 switch (CCOpcode) {
1218 case ISD::SETONE:
1219 case ISD::SETUNE:
1220 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001221 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1222 Temp = True;
1223 True = False;
1224 False = Temp;
1225 break;
1226 default:
1227 break;
1228 }
1229 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1230 Cond, Zero,
1231 True, False,
1232 DAG.getCondCode(CCOpcode));
1233 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1234 }
1235
Tom Stellard75aadc22012-12-11 21:25:42 +00001236 // If we make it this for it means we have no native instructions to handle
1237 // this SELECT_CC, so we must lower it.
1238 SDValue HWTrue, HWFalse;
1239
1240 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001241 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1242 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001243 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001244 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1245 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001246 }
1247 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001248 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001249 }
1250
1251 // Lower this unsupported SELECT_CC into a combination of two supported
1252 // SELECT_CC operations.
1253 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1254
1255 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1256 Cond, HWFalse,
1257 True, False,
1258 DAG.getCondCode(ISD::SETNE));
1259}
1260
Alp Tokercb402912014-01-24 17:20:08 +00001261/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001262/// convert these pointers to a register index. Each register holds
1263/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1264/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1265/// for indirect addressing.
1266SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1267 unsigned StackWidth,
1268 SelectionDAG &DAG) const {
1269 unsigned SRLPad;
1270 switch(StackWidth) {
1271 case 1:
1272 SRLPad = 2;
1273 break;
1274 case 2:
1275 SRLPad = 3;
1276 break;
1277 case 4:
1278 SRLPad = 4;
1279 break;
1280 default: llvm_unreachable("Invalid stack width");
1281 }
1282
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001283 SDLoc DL(Ptr);
1284 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1285 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001286}
1287
1288void R600TargetLowering::getStackAddress(unsigned StackWidth,
1289 unsigned ElemIdx,
1290 unsigned &Channel,
1291 unsigned &PtrIncr) const {
1292 switch (StackWidth) {
1293 default:
1294 case 1:
1295 Channel = 0;
1296 if (ElemIdx > 0) {
1297 PtrIncr = 1;
1298 } else {
1299 PtrIncr = 0;
1300 }
1301 break;
1302 case 2:
1303 Channel = ElemIdx % 2;
1304 if (ElemIdx == 2) {
1305 PtrIncr = 1;
1306 } else {
1307 PtrIncr = 0;
1308 }
1309 break;
1310 case 4:
1311 Channel = ElemIdx;
1312 PtrIncr = 0;
1313 break;
1314 }
1315}
1316
Matt Arsenault95245662016-02-11 05:32:46 +00001317SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1318 SelectionDAG &DAG) const {
1319 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001320
Matt Arsenault95245662016-02-11 05:32:46 +00001321 unsigned Mask = 0;
1322 if (Store->getMemoryVT() == MVT::i8) {
1323 Mask = 0xff;
1324 } else if (Store->getMemoryVT() == MVT::i16) {
1325 Mask = 0xffff;
1326 }
1327
1328 SDValue Chain = Store->getChain();
1329 SDValue BasePtr = Store->getBasePtr();
1330 EVT MemVT = Store->getMemoryVT();
1331
1332 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1333 DAG.getConstant(2, DL, MVT::i32));
1334 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1335 Chain, Ptr,
1336 DAG.getTargetConstant(0, DL, MVT::i32));
1337
1338 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1339 DAG.getConstant(0x3, DL, MVT::i32));
1340
1341 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1342 DAG.getConstant(3, DL, MVT::i32));
1343
1344 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1345 Store->getValue());
1346
1347 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1348
1349 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1350 MaskedValue, ShiftAmt);
1351
1352 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1353 DAG.getConstant(Mask, DL, MVT::i32),
1354 ShiftAmt);
1355 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1356 DAG.getConstant(0xffffffff, DL, MVT::i32));
1357 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1358
1359 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1360 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1361 Chain, Value, Ptr,
1362 DAG.getTargetConstant(0, DL, MVT::i32));
1363}
1364
1365SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1366 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001367 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001368
Matt Arsenault95245662016-02-11 05:32:46 +00001369 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1370 unsigned AS = StoreNode->getAddressSpace();
1371 SDValue Value = StoreNode->getValue();
1372 EVT ValueVT = Value.getValueType();
1373
1374 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1375 ValueVT.isVector()) {
1376 return SplitVectorStore(Op, DAG);
1377 }
1378
1379 SDLoc DL(Op);
1380 SDValue Chain = StoreNode->getChain();
1381 SDValue Ptr = StoreNode->getBasePtr();
1382
1383 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001384 if (StoreNode->isTruncatingStore()) {
1385 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001386 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001387 EVT MemVT = StoreNode->getMemoryVT();
1388 SDValue MaskConstant;
1389 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001390 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001391 } else {
1392 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001393 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001394 }
1395 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001396 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001397 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001398 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001399 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1400 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001401 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001402 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1403 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1404 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1405 // vector instead.
1406 SDValue Src[4] = {
1407 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001408 DAG.getConstant(0, DL, MVT::i32),
1409 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001410 Mask
1411 };
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001412 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001413 SDValue Args[3] = { Chain, Input, DWordAddr };
1414 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001415 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001416 StoreNode->getMemOperand());
1417 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001418 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001419 // Convert pointer from byte address to dword address.
1420 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1421 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001422 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001423
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001424 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001425 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001426 } else {
1427 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1428 }
1429 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001430 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001431 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001432
Matt Arsenault95245662016-02-11 05:32:46 +00001433 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001434 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001435
Matt Arsenault95245662016-02-11 05:32:46 +00001436 EVT MemVT = StoreNode->getMemoryVT();
1437 if (MemVT.bitsLT(MVT::i32))
1438 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001439
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001440 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001441 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001442 const AMDGPUFrameLowering *TFL =
1443 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001444 unsigned StackWidth = TFL->getStackWidth(MF);
1445
1446 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1447
1448 if (ValueVT.isVector()) {
1449 unsigned NumElemVT = ValueVT.getVectorNumElements();
1450 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001451 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001452
1453 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1454 "vector width in load");
1455
1456 for (unsigned i = 0; i < NumElemVT; ++i) {
1457 unsigned Channel, PtrIncr;
1458 getStackAddress(StackWidth, i, Channel, PtrIncr);
1459 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001460 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001461 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001462 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001463
1464 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1465 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001466 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001467 }
Craig Topper48d114b2014-04-26 18:35:24 +00001468 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001469 } else {
1470 if (ValueVT == MVT::i8) {
1471 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1472 }
1473 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001474 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001475 }
1476
1477 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001478}
1479
Tom Stellard365366f2013-01-23 02:09:06 +00001480// return (512 + (kc_bank << 12)
1481static int
1482ConstantAddressBlock(unsigned AddressSpace) {
1483 switch (AddressSpace) {
1484 case AMDGPUAS::CONSTANT_BUFFER_0:
1485 return 512;
1486 case AMDGPUAS::CONSTANT_BUFFER_1:
1487 return 512 + 4096;
1488 case AMDGPUAS::CONSTANT_BUFFER_2:
1489 return 512 + 4096 * 2;
1490 case AMDGPUAS::CONSTANT_BUFFER_3:
1491 return 512 + 4096 * 3;
1492 case AMDGPUAS::CONSTANT_BUFFER_4:
1493 return 512 + 4096 * 4;
1494 case AMDGPUAS::CONSTANT_BUFFER_5:
1495 return 512 + 4096 * 5;
1496 case AMDGPUAS::CONSTANT_BUFFER_6:
1497 return 512 + 4096 * 6;
1498 case AMDGPUAS::CONSTANT_BUFFER_7:
1499 return 512 + 4096 * 7;
1500 case AMDGPUAS::CONSTANT_BUFFER_8:
1501 return 512 + 4096 * 8;
1502 case AMDGPUAS::CONSTANT_BUFFER_9:
1503 return 512 + 4096 * 9;
1504 case AMDGPUAS::CONSTANT_BUFFER_10:
1505 return 512 + 4096 * 10;
1506 case AMDGPUAS::CONSTANT_BUFFER_11:
1507 return 512 + 4096 * 11;
1508 case AMDGPUAS::CONSTANT_BUFFER_12:
1509 return 512 + 4096 * 12;
1510 case AMDGPUAS::CONSTANT_BUFFER_13:
1511 return 512 + 4096 * 13;
1512 case AMDGPUAS::CONSTANT_BUFFER_14:
1513 return 512 + 4096 * 14;
1514 case AMDGPUAS::CONSTANT_BUFFER_15:
1515 return 512 + 4096 * 15;
1516 default:
1517 return -1;
1518 }
1519}
1520
Matt Arsenault6dfda962016-02-10 18:21:39 +00001521SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1522 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001523 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001524 LoadSDNode *Load = cast<LoadSDNode>(Op);
1525 ISD::LoadExtType ExtType = Load->getExtensionType();
1526 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001527
Matt Arsenault6dfda962016-02-10 18:21:39 +00001528 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1529 // register (2-)byte extract.
1530
1531 // Get Register holding the target.
1532 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1533 DAG.getConstant(2, DL, MVT::i32));
1534 // Load the Register.
1535 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1536 Load->getChain(),
1537 Ptr,
1538 DAG.getTargetConstant(0, DL, MVT::i32),
1539 Op.getOperand(2));
1540
1541 // Get offset within the register.
1542 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1543 Load->getBasePtr(),
1544 DAG.getConstant(0x3, DL, MVT::i32));
1545
1546 // Bit offset of target byte (byteIdx * 8).
1547 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1548 DAG.getConstant(3, DL, MVT::i32));
1549
1550 // Shift to the right.
1551 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1552
1553 // Eliminate the upper bits by setting them to ...
1554 EVT MemEltVT = MemVT.getScalarType();
1555
1556 // ... ones.
1557 if (ExtType == ISD::SEXTLOAD) {
1558 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1559
1560 SDValue Ops[] = {
1561 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1562 Load->getChain()
1563 };
1564
1565 return DAG.getMergeValues(Ops, DL);
1566 }
1567
1568 // ... or zeros.
1569 SDValue Ops[] = {
1570 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1571 Load->getChain()
1572 };
1573
1574 return DAG.getMergeValues(Ops, DL);
1575}
1576
1577SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1578 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1579 unsigned AS = LoadNode->getAddressSpace();
1580 EVT MemVT = LoadNode->getMemoryVT();
1581 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1582
1583 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1584 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1585 return lowerPrivateExtLoad(Op, DAG);
1586 }
1587
1588 SDLoc DL(Op);
1589 EVT VT = Op.getValueType();
1590 SDValue Chain = LoadNode->getChain();
1591 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001592
Tom Stellard35bb18c2013-08-26 15:06:04 +00001593 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1594 SDValue MergedValues[2] = {
Matt Arsenault9c499c32016-04-14 23:31:26 +00001595 scalarizeVectorLoad(LoadNode, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001596 Chain
1597 };
Craig Topper64941d92014-04-27 19:20:57 +00001598 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001599 }
1600
Tom Stellard365366f2013-01-23 02:09:06 +00001601 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001602 if (ConstantBlock > -1 &&
1603 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1604 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001605 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001606 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1607 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001608 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001609 SDValue Slots[4];
1610 for (unsigned i = 0; i < 4; i++) {
1611 // We want Const position encoded with the following formula :
1612 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1613 // const_index is Ptr computed by llvm using an alignment of 16.
1614 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1615 // then div by 4 at the ISel step
1616 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001617 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001618 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1619 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001620 EVT NewVT = MVT::v4i32;
1621 unsigned NumElements = 4;
1622 if (VT.isVector()) {
1623 NewVT = VT;
1624 NumElements = VT.getVectorNumElements();
1625 }
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001626 Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001627 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001628 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001629 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001630 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1631 DAG.getConstant(4, DL, MVT::i32)),
1632 DAG.getConstant(LoadNode->getAddressSpace() -
1633 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001634 );
1635 }
1636
1637 if (!VT.isVector()) {
1638 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001639 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001640 }
1641
1642 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001643 Result,
1644 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001645 };
Craig Topper64941d92014-04-27 19:20:57 +00001646 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001647 }
1648
Matt Arsenault6dfda962016-02-10 18:21:39 +00001649 SDValue LoweredLoad;
1650
Matt Arsenault909d0c02013-10-30 23:43:29 +00001651 // For most operations returning SDValue() will result in the node being
1652 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1653 // need to manually expand loads that may be legal in some address spaces and
1654 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1655 // compute shaders, since the data is sign extended when it is uploaded to the
1656 // buffer. However SEXT loads from other address spaces are not supported, so
1657 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001658 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1659 EVT MemVT = LoadNode->getMemoryVT();
1660 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001661 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1662 LoadNode->getPointerInfo(), MemVT,
1663 LoadNode->isVolatile(),
1664 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001665 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001666 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001667 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1668 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001669
Jan Veselyb670d372015-05-26 18:07:22 +00001670 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001671 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001672 }
1673
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001674 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1675 return SDValue();
1676 }
1677
1678 // Lowering for indirect addressing
1679 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001680 const AMDGPUFrameLowering *TFL =
1681 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001682 unsigned StackWidth = TFL->getStackWidth(MF);
1683
1684 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1685
1686 if (VT.isVector()) {
1687 unsigned NumElemVT = VT.getVectorNumElements();
1688 EVT ElemVT = VT.getVectorElementType();
1689 SDValue Loads[4];
1690
Jan Vesely687ca8d2016-05-16 23:56:32 +00001691 assert(NumElemVT <= 4);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001692 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1693 "vector width in load");
1694
1695 for (unsigned i = 0; i < NumElemVT; ++i) {
1696 unsigned Channel, PtrIncr;
1697 getStackAddress(StackWidth, i, Channel, PtrIncr);
1698 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001699 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001700 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1701 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001702 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001703 Op.getOperand(2));
1704 }
Jan Vesely687ca8d2016-05-16 23:56:32 +00001705 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
1706 LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001707 } else {
1708 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1709 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001710 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001711 Op.getOperand(2));
1712 }
1713
Matt Arsenault7939acd2014-04-07 16:44:24 +00001714 SDValue Ops[2] = {
1715 LoweredLoad,
1716 Chain
1717 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001718
Craig Topper64941d92014-04-27 19:20:57 +00001719 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001720}
Tom Stellard75aadc22012-12-11 21:25:42 +00001721
Matt Arsenault1d555c42014-06-23 18:00:55 +00001722SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1723 SDValue Chain = Op.getOperand(0);
1724 SDValue Cond = Op.getOperand(1);
1725 SDValue Jump = Op.getOperand(2);
1726
1727 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1728 Chain, Jump, Cond);
1729}
1730
Matt Arsenault81d06012016-03-07 21:10:13 +00001731SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1732 SelectionDAG &DAG) const {
1733 MachineFunction &MF = DAG.getMachineFunction();
1734 const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
1735
1736 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1737
1738 unsigned FrameIndex = FIN->getIndex();
1739 unsigned IgnoredFrameReg;
1740 unsigned Offset =
1741 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1742 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1743 Op.getValueType());
1744}
1745
Tom Stellard75aadc22012-12-11 21:25:42 +00001746/// XXX Only kernel functions are supported, so we can assume for now that
1747/// every function is a kernel function, but in the future we should use
1748/// separate calling conventions for kernel and non-kernel functions.
1749SDValue R600TargetLowering::LowerFormalArguments(
1750 SDValue Chain,
1751 CallingConv::ID CallConv,
1752 bool isVarArg,
1753 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001754 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001755 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001756 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001757 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1758 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001759 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001760 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001761
Tom Stellardaf775432013-10-23 00:44:32 +00001762 SmallVector<ISD::InputArg, 8> LocalIns;
1763
Matt Arsenault209a7b92014-04-18 07:40:20 +00001764 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001765
1766 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001767
Tom Stellard1e803092013-07-23 01:48:18 +00001768 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001769 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001770 const ISD::InputArg &In = Ins[i];
1771 EVT VT = In.VT;
1772 EVT MemVT = VA.getLocVT();
1773 if (!VT.isVector() && MemVT.isVector()) {
1774 // Get load source type if scalarized.
1775 MemVT = MemVT.getVectorElementType();
1776 }
Tom Stellard78e01292013-07-23 01:47:58 +00001777
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001778 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001779 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1780 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1781 InVals.push_back(Register);
1782 continue;
1783 }
1784
Tom Stellard75aadc22012-12-11 21:25:42 +00001785 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001786 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001787
Matt Arsenaultfae02982014-03-17 18:58:11 +00001788 // i64 isn't a legal type, so the register type used ends up as i32, which
1789 // isn't expected here. It attempts to create this sextload, but it ends up
1790 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1791 // for <1 x i64>.
1792
Tom Stellardacfeebf2013-07-23 01:48:05 +00001793 // The first 36 bytes of the input buffer contains information about
1794 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001795 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1796 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1797 // FIXME: This should really check the extload type, but the handling of
1798 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001799
Matt Arsenault74ef2772014-08-13 18:14:11 +00001800 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1801 Ext = ISD::SEXTLOAD;
1802 }
1803
1804 // Compute the offset from the value.
1805 // XXX - I think PartOffset should give you this, but it seems to give the
1806 // size of the register which isn't useful.
1807
Andrew Trick05938a52015-02-16 18:10:47 +00001808 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001809 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001810 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001811
1812 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1813 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001814 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001815 DAG.getUNDEF(MVT::i32),
1816 PtrInfo,
1817 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001818
1819 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001820 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001821 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001822 }
1823 return Chain;
1824}
1825
Mehdi Amini44ede332015-07-09 02:09:04 +00001826EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1827 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001828 if (!VT.isVector())
1829 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001830 return VT.changeVectorElementTypeToInteger();
1831}
1832
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001833bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1834 unsigned AddrSpace,
1835 unsigned Align,
1836 bool *IsFast) const {
1837 if (IsFast)
1838 *IsFast = false;
1839
1840 if (!VT.isSimple() || VT == MVT::Other)
1841 return false;
1842
1843 if (VT.bitsLT(MVT::i32))
1844 return false;
1845
1846 // TODO: This is a rough estimate.
1847 if (IsFast)
1848 *IsFast = true;
1849
1850 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1851}
1852
Matt Arsenault209a7b92014-04-18 07:40:20 +00001853static SDValue CompactSwizzlableVector(
1854 SelectionDAG &DAG, SDValue VectorEntry,
1855 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001856 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1857 assert(RemapSwizzle.empty());
1858 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001859 VectorEntry.getOperand(0),
1860 VectorEntry.getOperand(1),
1861 VectorEntry.getOperand(2),
1862 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001863 };
1864
1865 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001866 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001867 // We mask write here to teach later passes that the ith element of this
1868 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1869 // break false dependencies and additionnaly make assembly easier to read.
1870 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001871 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1872 if (C->isZero()) {
1873 RemapSwizzle[i] = 4; // SEL_0
1874 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1875 } else if (C->isExactlyValue(1.0)) {
1876 RemapSwizzle[i] = 5; // SEL_1
1877 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1878 }
1879 }
1880
Sanjay Patel57195842016-03-14 17:28:46 +00001881 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001882 continue;
1883 for (unsigned j = 0; j < i; j++) {
1884 if (NewBldVec[i] == NewBldVec[j]) {
1885 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1886 RemapSwizzle[i] = j;
1887 break;
1888 }
1889 }
1890 }
1891
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001892 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1893 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001894}
1895
Benjamin Kramer193960c2013-06-11 13:32:25 +00001896static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1897 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001898 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1899 assert(RemapSwizzle.empty());
1900 SDValue NewBldVec[4] = {
1901 VectorEntry.getOperand(0),
1902 VectorEntry.getOperand(1),
1903 VectorEntry.getOperand(2),
1904 VectorEntry.getOperand(3)
1905 };
1906 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001907 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001908 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001909 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1910 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1911 ->getZExtValue();
1912 if (i == Idx)
1913 isUnmovable[Idx] = true;
1914 }
1915 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001916
1917 for (unsigned i = 0; i < 4; i++) {
1918 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1919 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1920 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001921 if (isUnmovable[Idx])
1922 continue;
1923 // Swap i and Idx
1924 std::swap(NewBldVec[Idx], NewBldVec[i]);
1925 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1926 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001927 }
1928 }
1929
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001930 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1931 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001932}
1933
1934
1935SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001936 SDValue Swz[4], SelectionDAG &DAG,
1937 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001938 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1939 // Old -> New swizzle values
1940 DenseMap<unsigned, unsigned> SwizzleRemap;
1941
1942 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1943 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001944 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001945 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001946 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001947 }
1948
1949 SwizzleRemap.clear();
1950 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1951 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001952 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001953 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001954 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001955 }
1956
1957 return BuildVector;
1958}
1959
1960
Tom Stellard75aadc22012-12-11 21:25:42 +00001961//===----------------------------------------------------------------------===//
1962// Custom DAG Optimizations
1963//===----------------------------------------------------------------------===//
1964
1965SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1966 DAGCombinerInfo &DCI) const {
1967 SelectionDAG &DAG = DCI.DAG;
1968
1969 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001970 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001971 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1972 case ISD::FP_ROUND: {
1973 SDValue Arg = N->getOperand(0);
1974 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001975 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001976 Arg.getOperand(0));
1977 }
1978 break;
1979 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001980
1981 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1982 // (i32 select_cc f32, f32, -1, 0 cc)
1983 //
1984 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1985 // this to one of the SET*_DX10 instructions.
1986 case ISD::FP_TO_SINT: {
1987 SDValue FNeg = N->getOperand(0);
1988 if (FNeg.getOpcode() != ISD::FNEG) {
1989 return SDValue();
1990 }
1991 SDValue SelectCC = FNeg.getOperand(0);
1992 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1993 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1994 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1995 !isHWTrueValue(SelectCC.getOperand(2)) ||
1996 !isHWFalseValue(SelectCC.getOperand(3))) {
1997 return SDValue();
1998 }
1999
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002000 SDLoc dl(N);
2001 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00002002 SelectCC.getOperand(0), // LHS
2003 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002004 DAG.getConstant(-1, dl, MVT::i32), // True
2005 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00002006 SelectCC.getOperand(4)); // CC
2007
2008 break;
2009 }
Quentin Colombete2e05482013-07-30 00:27:16 +00002010
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00002011 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
2012 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00002013 case ISD::INSERT_VECTOR_ELT: {
2014 SDValue InVec = N->getOperand(0);
2015 SDValue InVal = N->getOperand(1);
2016 SDValue EltNo = N->getOperand(2);
2017 SDLoc dl(N);
2018
2019 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00002020 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00002021 return InVec;
2022
2023 EVT VT = InVec.getValueType();
2024
2025 // If we can't generate a legal BUILD_VECTOR, exit
2026 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
2027 return SDValue();
2028
2029 // Check that we know which element is being inserted
2030 if (!isa<ConstantSDNode>(EltNo))
2031 return SDValue();
2032 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2033
2034 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2035 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2036 // vector elements.
2037 SmallVector<SDValue, 8> Ops;
2038 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2039 Ops.append(InVec.getNode()->op_begin(),
2040 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00002041 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00002042 unsigned NElts = VT.getVectorNumElements();
2043 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2044 } else {
2045 return SDValue();
2046 }
2047
2048 // Insert the element
2049 if (Elt < Ops.size()) {
2050 // All the operands of BUILD_VECTOR must have the same type;
2051 // we enforce that here.
2052 EVT OpVT = Ops[0].getValueType();
2053 if (InVal.getValueType() != OpVT)
2054 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2055 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2056 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2057 Ops[Elt] = InVal;
2058 }
2059
2060 // Return the new vector
Ahmed Bougacha128f8732016-04-26 21:15:30 +00002061 return DAG.getBuildVector(VT, dl, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002062 }
2063
Tom Stellard365366f2013-01-23 02:09:06 +00002064 // Extract_vec (Build_vector) generated by custom lowering
2065 // also needs to be customly combined
2066 case ISD::EXTRACT_VECTOR_ELT: {
2067 SDValue Arg = N->getOperand(0);
2068 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2069 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2070 unsigned Element = Const->getZExtValue();
2071 return Arg->getOperand(Element);
2072 }
2073 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002074 if (Arg.getOpcode() == ISD::BITCAST &&
2075 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2076 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2077 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002078 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002079 Arg->getOperand(0).getOperand(Element));
2080 }
2081 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002082 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002083 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002084
2085 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002086 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002087 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002088 return Ret;
2089
Tom Stellarde06163a2013-02-07 14:02:35 +00002090 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2091 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002092 //
2093 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2094 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002095 SDValue LHS = N->getOperand(0);
2096 if (LHS.getOpcode() != ISD::SELECT_CC) {
2097 return SDValue();
2098 }
2099
2100 SDValue RHS = N->getOperand(1);
2101 SDValue True = N->getOperand(2);
2102 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002103 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002104
2105 if (LHS.getOperand(2).getNode() != True.getNode() ||
2106 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002107 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002108 return SDValue();
2109 }
2110
Tom Stellard5e524892013-03-08 15:37:11 +00002111 switch (NCC) {
2112 default: return SDValue();
2113 case ISD::SETNE: return LHS;
2114 case ISD::SETEQ: {
2115 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2116 LHSCC = ISD::getSetCCInverse(LHSCC,
2117 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002118 if (DCI.isBeforeLegalizeOps() ||
2119 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2120 return DAG.getSelectCC(SDLoc(N),
2121 LHS.getOperand(0),
2122 LHS.getOperand(1),
2123 LHS.getOperand(2),
2124 LHS.getOperand(3),
2125 LHSCC);
2126 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002127 }
Tom Stellard5e524892013-03-08 15:37:11 +00002128 }
Tom Stellardcd428182013-09-28 02:50:38 +00002129 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002130 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002131
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002132 case AMDGPUISD::EXPORT: {
2133 SDValue Arg = N->getOperand(1);
2134 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2135 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002136
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002137 SDValue NewArgs[8] = {
2138 N->getOperand(0), // Chain
2139 SDValue(),
2140 N->getOperand(2), // ArrayBase
2141 N->getOperand(3), // Type
2142 N->getOperand(4), // SWZ_X
2143 N->getOperand(5), // SWZ_Y
2144 N->getOperand(6), // SWZ_Z
2145 N->getOperand(7) // SWZ_W
2146 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002147 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002148 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002149 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002150 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002151 case AMDGPUISD::TEXTURE_FETCH: {
2152 SDValue Arg = N->getOperand(1);
2153 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2154 break;
2155
2156 SDValue NewArgs[19] = {
2157 N->getOperand(0),
2158 N->getOperand(1),
2159 N->getOperand(2),
2160 N->getOperand(3),
2161 N->getOperand(4),
2162 N->getOperand(5),
2163 N->getOperand(6),
2164 N->getOperand(7),
2165 N->getOperand(8),
2166 N->getOperand(9),
2167 N->getOperand(10),
2168 N->getOperand(11),
2169 N->getOperand(12),
2170 N->getOperand(13),
2171 N->getOperand(14),
2172 N->getOperand(15),
2173 N->getOperand(16),
2174 N->getOperand(17),
2175 N->getOperand(18),
2176 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002177 SDLoc DL(N);
2178 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2179 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002180 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002181 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002182
2183 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002184}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002185
2186static bool
2187FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002188 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002189 const R600InstrInfo *TII =
2190 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002191 if (!Src.isMachineOpcode())
2192 return false;
2193 switch (Src.getMachineOpcode()) {
2194 case AMDGPU::FNEG_R600:
2195 if (!Neg.getNode())
2196 return false;
2197 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002198 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002199 return true;
2200 case AMDGPU::FABS_R600:
2201 if (!Abs.getNode())
2202 return false;
2203 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002204 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002205 return true;
2206 case AMDGPU::CONST_COPY: {
2207 unsigned Opcode = ParentNode->getMachineOpcode();
2208 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2209
2210 if (!Sel.getNode())
2211 return false;
2212
2213 SDValue CstOffset = Src.getOperand(0);
2214 if (ParentNode->getValueType(0).isVector())
2215 return false;
2216
2217 // Gather constants values
2218 int SrcIndices[] = {
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2223 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2224 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2228 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2229 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2230 };
2231 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002232 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002233 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2234 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2235 continue;
2236 if (HasDst) {
2237 OtherSrcIdx--;
2238 OtherSelIdx--;
2239 }
2240 if (RegisterSDNode *Reg =
2241 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2242 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002243 ConstantSDNode *Cst
2244 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002245 Consts.push_back(Cst->getZExtValue());
2246 }
2247 }
2248 }
2249
Matt Arsenault37c12d72014-05-12 20:42:57 +00002250 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002251 Consts.push_back(Cst->getZExtValue());
2252 if (!TII->fitsConstReadLimitations(Consts)) {
2253 return false;
2254 }
2255
2256 Sel = CstOffset;
2257 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2258 return true;
2259 }
Jan Vesely16800392016-05-13 20:39:31 +00002260 case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2261 // Check if the Imm slot is used. Taken from below.
2262 if (cast<ConstantSDNode>(Imm)->getZExtValue())
2263 return false;
2264 Imm = Src.getOperand(0);
2265 Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2266 return true;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002267 case AMDGPU::MOV_IMM_I32:
2268 case AMDGPU::MOV_IMM_F32: {
2269 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2270 uint64_t ImmValue = 0;
2271
2272
2273 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2274 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2275 float FloatValue = FPC->getValueAPF().convertToFloat();
2276 if (FloatValue == 0.0) {
2277 ImmReg = AMDGPU::ZERO;
2278 } else if (FloatValue == 0.5) {
2279 ImmReg = AMDGPU::HALF;
2280 } else if (FloatValue == 1.0) {
2281 ImmReg = AMDGPU::ONE;
2282 } else {
2283 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2284 }
2285 } else {
2286 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2287 uint64_t Value = C->getZExtValue();
2288 if (Value == 0) {
2289 ImmReg = AMDGPU::ZERO;
2290 } else if (Value == 1) {
2291 ImmReg = AMDGPU::ONE_INT;
2292 } else {
2293 ImmValue = Value;
2294 }
2295 }
2296
2297 // Check that we aren't already using an immediate.
2298 // XXX: It's possible for an instruction to have more than one
2299 // immediate operand, but this is not supported yet.
2300 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2301 if (!Imm.getNode())
2302 return false;
2303 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2304 assert(C);
2305 if (C->getZExtValue())
2306 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002307 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002308 }
2309 Src = DAG.getRegister(ImmReg, MVT::i32);
2310 return true;
2311 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002312 default:
2313 return false;
2314 }
2315}
2316
2317
2318/// \brief Fold the instructions after selecting them
2319SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2320 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002321 const R600InstrInfo *TII =
2322 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002323 if (!Node->isMachineOpcode())
2324 return Node;
2325 unsigned Opcode = Node->getMachineOpcode();
2326 SDValue FakeOp;
2327
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002328 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002329
2330 if (Opcode == AMDGPU::DOT_4) {
2331 int OperandIdx[] = {
2332 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2333 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2334 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2335 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2336 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2337 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2338 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2339 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002340 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002341 int NegIdx[] = {
2342 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2343 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2344 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2345 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2346 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2347 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2348 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2349 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2350 };
2351 int AbsIdx[] = {
2352 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2353 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2354 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2355 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2356 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2357 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2358 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2359 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2360 };
2361 for (unsigned i = 0; i < 8; i++) {
2362 if (OperandIdx[i] < 0)
2363 return Node;
2364 SDValue &Src = Ops[OperandIdx[i] - 1];
2365 SDValue &Neg = Ops[NegIdx[i] - 1];
2366 SDValue &Abs = Ops[AbsIdx[i] - 1];
2367 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2368 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2369 if (HasDst)
2370 SelIdx--;
2371 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002372 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2373 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2374 }
2375 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2376 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2377 SDValue &Src = Ops[i];
2378 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002379 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2380 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002381 } else if (Opcode == AMDGPU::CLAMP_R600) {
2382 SDValue Src = Node->getOperand(0);
2383 if (!Src.isMachineOpcode() ||
2384 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2385 return Node;
2386 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2387 AMDGPU::OpName::clamp);
2388 if (ClampIdx < 0)
2389 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002390 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002391 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002392 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2393 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2394 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002395 } else {
2396 if (!TII->hasInstrModifiers(Opcode))
2397 return Node;
2398 int OperandIdx[] = {
2399 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2400 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2401 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2402 };
2403 int NegIdx[] = {
2404 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2405 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2406 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2407 };
2408 int AbsIdx[] = {
2409 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2410 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2411 -1
2412 };
2413 for (unsigned i = 0; i < 3; i++) {
2414 if (OperandIdx[i] < 0)
2415 return Node;
2416 SDValue &Src = Ops[OperandIdx[i] - 1];
2417 SDValue &Neg = Ops[NegIdx[i] - 1];
2418 SDValue FakeAbs;
2419 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2420 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2421 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002422 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2423 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002424 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002425 ImmIdx--;
2426 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002427 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002428 SDValue &Imm = Ops[ImmIdx];
2429 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002430 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2431 }
2432 }
2433
2434 return Node;
2435}