blob: 3f61f9375858388d31c27f8d30344ee8216fe268 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard75aadc22012-12-11 21:25:42 +000037 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000038 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
39 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
Matt Arsenault71e66762016-05-21 02:27:49 +000040 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
41 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Matt Arsenault71e66762016-05-21 02:27:49 +000045 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, MVT::i32, Custom);
47 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
48 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
49
50 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51 // spaces, so it is custom lowered to handle those where it isn't.
52 for (MVT VT : MVT::integer_valuetypes()) {
53 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
54 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
55 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
56
57 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
58 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
59 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
60
61 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
62 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
63 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
64 }
65
66 setOperationAction(ISD::STORE, MVT::i8, Custom);
67 setOperationAction(ISD::STORE, MVT::i32, Custom);
68 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
69 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
70
71 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
72 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
73
Tom Stellard0351ea22013-09-28 02:50:50 +000074 // Set condition code actions
75 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
76 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000077 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000078 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000079 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
80 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000081 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
82 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
83 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
84 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000085 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
86 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
87
88 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
89 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
90 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
91 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
92
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000093 setOperationAction(ISD::FCOS, MVT::f32, Custom);
94 setOperationAction(ISD::FSIN, MVT::f32, Custom);
95
Tom Stellard75aadc22012-12-11 21:25:42 +000096 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000097 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000098
Tom Stellard492ebea2013-03-08 15:37:07 +000099 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
100 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +0000101 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000102
103 setOperationAction(ISD::FSUB, MVT::f32, Expand);
104
Tom Stellard75aadc22012-12-11 21:25:42 +0000105 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
106 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
107
Tom Stellarde8f9f282013-03-08 15:37:05 +0000108 setOperationAction(ISD::SETCC, MVT::i32, Expand);
109 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000110 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000111 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
112 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000113
Tom Stellard53f2f902013-09-05 18:38:03 +0000114 setOperationAction(ISD::SELECT, MVT::i32, Expand);
115 setOperationAction(ISD::SELECT, MVT::f32, Expand);
116 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +0000117 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000118
Jan Vesely808fff52015-04-30 17:15:56 +0000119 // ADD, SUB overflow.
120 // TODO: turn these into Legal?
121 if (Subtarget->hasCARRY())
122 setOperationAction(ISD::UADDO, MVT::i32, Custom);
123
124 if (Subtarget->hasBORROW())
125 setOperationAction(ISD::USUBO, MVT::i32, Custom);
126
Matt Arsenault4e466652014-04-16 01:41:30 +0000127 // Expand sign extension of vectors
128 if (!Subtarget->hasBFE())
129 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
130
131 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
132 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
133
134 if (!Subtarget->hasBFE())
135 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
138
139 if (!Subtarget->hasBFE())
140 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
141 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
142 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
143
144 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
145 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
146 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
147
148 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
149
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000150 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
151
Tom Stellard880a80a2014-06-17 16:53:14 +0000152 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
153 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
154 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
155 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
156
157 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
158 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
159 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
160 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
161
Jan Vesely25f36272014-06-18 12:27:13 +0000162 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
163 // to be Legal/Custom in order to avoid library calls.
164 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000165 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000166 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000167
Michel Danzer49812b52013-07-10 16:37:07 +0000168 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
169
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000170 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
171 for (MVT VT : ScalarIntVTs) {
172 setOperationAction(ISD::ADDC, VT, Expand);
173 setOperationAction(ISD::SUBC, VT, Expand);
174 setOperationAction(ISD::ADDE, VT, Expand);
175 setOperationAction(ISD::SUBE, VT, Expand);
176 }
177
Tom Stellardfc455472013-08-12 22:33:21 +0000178 setSchedulingPreference(Sched::Source);
Matt Arsenault71e66762016-05-21 02:27:49 +0000179
180
181 setTargetDAGCombine(ISD::FP_ROUND);
182 setTargetDAGCombine(ISD::FP_TO_SINT);
183 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
184 setTargetDAGCombine(ISD::SELECT_CC);
185 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000186}
187
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000188static inline bool isEOP(MachineBasicBlock::iterator I) {
189 return std::next(I)->getOpcode() == AMDGPU::RETURN;
190}
191
Tom Stellard75aadc22012-12-11 21:25:42 +0000192MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
193 MachineInstr * MI, MachineBasicBlock * BB) const {
194 MachineFunction * MF = BB->getParent();
195 MachineRegisterInfo &MRI = MF->getRegInfo();
196 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000197 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000198 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000199
200 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000201 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000202 // Replace LDS_*_RET instruction that don't have any uses with the
203 // equivalent LDS_*_NORET instruction.
204 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000205 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
206 assert(DstIdx != -1);
207 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000208 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
209 // LDS_1A2D support and remove this special case.
210 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
211 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000212 return BB;
213
214 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
215 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000216 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
217 NewMI.addOperand(MI->getOperand(i));
218 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000219 } else {
220 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
221 }
222 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000223 case AMDGPU::CLAMP_R600: {
224 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
225 AMDGPU::MOV,
226 MI->getOperand(0).getReg(),
227 MI->getOperand(1).getReg());
228 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
229 break;
230 }
231
232 case AMDGPU::FABS_R600: {
233 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
234 AMDGPU::MOV,
235 MI->getOperand(0).getReg(),
236 MI->getOperand(1).getReg());
237 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
238 break;
239 }
240
241 case AMDGPU::FNEG_R600: {
242 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
243 AMDGPU::MOV,
244 MI->getOperand(0).getReg(),
245 MI->getOperand(1).getReg());
246 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
247 break;
248 }
249
Tom Stellard75aadc22012-12-11 21:25:42 +0000250 case AMDGPU::MASK_WRITE: {
251 unsigned maskedRegister = MI->getOperand(0).getReg();
252 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
253 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
254 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
255 break;
256 }
257
258 case AMDGPU::MOV_IMM_F32:
259 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
260 MI->getOperand(1).getFPImm()->getValueAPF()
261 .bitcastToAPInt().getZExtValue());
262 break;
263 case AMDGPU::MOV_IMM_I32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getImm());
266 break;
Jan Veselyf97de002016-05-13 20:39:29 +0000267 case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
268 //TODO: Perhaps combine this instruction with the next if possible
269 auto MIB = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
270 MI->getOperand(0).getReg(),
271 AMDGPU::ALU_LITERAL_X);
272 int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
273 //TODO: Ugh this is rather ugly
274 MIB->getOperand(Idx) = MI->getOperand(1);
275 break;
276 }
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000277 case AMDGPU::CONST_COPY: {
278 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
279 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000280 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000281 MI->getOperand(1).getImm());
282 break;
283 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000284
285 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000286 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000288 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
289 .addOperand(MI->getOperand(0))
290 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000291 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000292 break;
293 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000294 case AMDGPU::RAT_STORE_TYPED_eg: {
295 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
296 .addOperand(MI->getOperand(0))
297 .addOperand(MI->getOperand(1))
298 .addOperand(MI->getOperand(2))
299 .addImm(isEOP(I)); // Set End of program bit
300 break;
301 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000302
Tom Stellard75aadc22012-12-11 21:25:42 +0000303 case AMDGPU::TXD: {
304 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
305 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000306 MachineOperand &RID = MI->getOperand(4);
307 MachineOperand &SID = MI->getOperand(5);
308 unsigned TextureId = MI->getOperand(6).getImm();
309 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
310 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000311
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000312 switch (TextureId) {
313 case 5: // Rect
314 CTX = CTY = 0;
315 break;
316 case 6: // Shadow1D
317 SrcW = SrcZ;
318 break;
319 case 7: // Shadow2D
320 SrcW = SrcZ;
321 break;
322 case 8: // ShadowRect
323 CTX = CTY = 0;
324 SrcW = SrcZ;
325 break;
326 case 9: // 1DArray
327 SrcZ = SrcY;
328 CTZ = 0;
329 break;
330 case 10: // 2DArray
331 CTZ = 0;
332 break;
333 case 11: // Shadow1DArray
334 SrcZ = SrcY;
335 CTZ = 0;
336 break;
337 case 12: // Shadow2DArray
338 CTZ = 0;
339 break;
340 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000341 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
342 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000343 .addImm(SrcX)
344 .addImm(SrcY)
345 .addImm(SrcZ)
346 .addImm(SrcW)
347 .addImm(0)
348 .addImm(0)
349 .addImm(0)
350 .addImm(0)
351 .addImm(1)
352 .addImm(2)
353 .addImm(3)
354 .addOperand(RID)
355 .addOperand(SID)
356 .addImm(CTX)
357 .addImm(CTY)
358 .addImm(CTZ)
359 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000360 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
361 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000362 .addImm(SrcX)
363 .addImm(SrcY)
364 .addImm(SrcZ)
365 .addImm(SrcW)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(0)
370 .addImm(1)
371 .addImm(2)
372 .addImm(3)
373 .addOperand(RID)
374 .addOperand(SID)
375 .addImm(CTX)
376 .addImm(CTY)
377 .addImm(CTZ)
378 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000379 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
380 .addOperand(MI->getOperand(0))
381 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000382 .addImm(SrcX)
383 .addImm(SrcY)
384 .addImm(SrcZ)
385 .addImm(SrcW)
386 .addImm(0)
387 .addImm(0)
388 .addImm(0)
389 .addImm(0)
390 .addImm(1)
391 .addImm(2)
392 .addImm(3)
393 .addOperand(RID)
394 .addOperand(SID)
395 .addImm(CTX)
396 .addImm(CTY)
397 .addImm(CTZ)
398 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000399 .addReg(T0, RegState::Implicit)
400 .addReg(T1, RegState::Implicit);
401 break;
402 }
403
404 case AMDGPU::TXD_SHADOW: {
405 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
406 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000407 MachineOperand &RID = MI->getOperand(4);
408 MachineOperand &SID = MI->getOperand(5);
409 unsigned TextureId = MI->getOperand(6).getImm();
410 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
411 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
412
413 switch (TextureId) {
414 case 5: // Rect
415 CTX = CTY = 0;
416 break;
417 case 6: // Shadow1D
418 SrcW = SrcZ;
419 break;
420 case 7: // Shadow2D
421 SrcW = SrcZ;
422 break;
423 case 8: // ShadowRect
424 CTX = CTY = 0;
425 SrcW = SrcZ;
426 break;
427 case 9: // 1DArray
428 SrcZ = SrcY;
429 CTZ = 0;
430 break;
431 case 10: // 2DArray
432 CTZ = 0;
433 break;
434 case 11: // Shadow1DArray
435 SrcZ = SrcY;
436 CTZ = 0;
437 break;
438 case 12: // Shadow2DArray
439 CTZ = 0;
440 break;
441 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000442
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
444 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000445 .addImm(SrcX)
446 .addImm(SrcY)
447 .addImm(SrcZ)
448 .addImm(SrcW)
449 .addImm(0)
450 .addImm(0)
451 .addImm(0)
452 .addImm(0)
453 .addImm(1)
454 .addImm(2)
455 .addImm(3)
456 .addOperand(RID)
457 .addOperand(SID)
458 .addImm(CTX)
459 .addImm(CTY)
460 .addImm(CTZ)
461 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000462 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
463 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000464 .addImm(SrcX)
465 .addImm(SrcY)
466 .addImm(SrcZ)
467 .addImm(SrcW)
468 .addImm(0)
469 .addImm(0)
470 .addImm(0)
471 .addImm(0)
472 .addImm(1)
473 .addImm(2)
474 .addImm(3)
475 .addOperand(RID)
476 .addOperand(SID)
477 .addImm(CTX)
478 .addImm(CTY)
479 .addImm(CTZ)
480 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000481 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
482 .addOperand(MI->getOperand(0))
483 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000484 .addImm(SrcX)
485 .addImm(SrcY)
486 .addImm(SrcZ)
487 .addImm(SrcW)
488 .addImm(0)
489 .addImm(0)
490 .addImm(0)
491 .addImm(0)
492 .addImm(1)
493 .addImm(2)
494 .addImm(3)
495 .addOperand(RID)
496 .addOperand(SID)
497 .addImm(CTX)
498 .addImm(CTY)
499 .addImm(CTZ)
500 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000501 .addReg(T0, RegState::Implicit)
502 .addReg(T1, RegState::Implicit);
503 break;
504 }
505
506 case AMDGPU::BRANCH:
507 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000508 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 break;
510
511 case AMDGPU::BRANCH_COND_f32: {
512 MachineInstr *NewMI =
513 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
514 AMDGPU::PREDICATE_BIT)
515 .addOperand(MI->getOperand(1))
516 .addImm(OPCODE_IS_NOT_ZERO)
517 .addImm(0); // Flags
518 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000519 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 .addOperand(MI->getOperand(0))
521 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
522 break;
523 }
524
525 case AMDGPU::BRANCH_COND_i32: {
526 MachineInstr *NewMI =
527 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
528 AMDGPU::PREDICATE_BIT)
529 .addOperand(MI->getOperand(1))
530 .addImm(OPCODE_IS_NOT_ZERO_INT)
531 .addImm(0); // Flags
532 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000533 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 .addOperand(MI->getOperand(0))
535 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
536 break;
537 }
538
Tom Stellard75aadc22012-12-11 21:25:42 +0000539 case AMDGPU::EG_ExportSwz:
540 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000541 // Instruction is left unmodified if its not the last one of its type
542 bool isLastInstructionOfItsType = true;
543 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000544 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000545 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000546 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000547 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
548 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
549 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
550 .getImm();
551 if (CurrentInstExportType == InstExportType) {
552 isLastInstructionOfItsType = false;
553 break;
554 }
555 }
556 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000557 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000558 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000559 return BB;
560 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
561 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
562 .addOperand(MI->getOperand(0))
563 .addOperand(MI->getOperand(1))
564 .addOperand(MI->getOperand(2))
565 .addOperand(MI->getOperand(3))
566 .addOperand(MI->getOperand(4))
567 .addOperand(MI->getOperand(5))
568 .addOperand(MI->getOperand(6))
569 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000570 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000571 break;
572 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000573 case AMDGPU::RETURN: {
574 // RETURN instructions must have the live-out registers as implicit uses,
575 // otherwise they appear dead.
576 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
577 MachineInstrBuilder MIB(*MF, MI);
578 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
579 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
580 return BB;
581 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000582 }
583
584 MI->eraseFromParent();
585 return BB;
586}
587
588//===----------------------------------------------------------------------===//
589// Custom DAG Lowering Operations
590//===----------------------------------------------------------------------===//
591
Tom Stellard75aadc22012-12-11 21:25:42 +0000592SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000593 MachineFunction &MF = DAG.getMachineFunction();
594 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000595 switch (Op.getOpcode()) {
596 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000597 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
598 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000599 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000600 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000601 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000602 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
603 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000604 case ISD::FCOS:
605 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000606 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000607 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000608 case ISD::LOAD: {
609 SDValue Result = LowerLOAD(Op, DAG);
610 assert((!Result.getNode() ||
611 Result.getNode()->getNumValues() == 2) &&
612 "Load should return a value and a chain");
613 return Result;
614 }
615
Matt Arsenault1d555c42014-06-23 18:00:55 +0000616 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000617 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000618 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000619 case ISD::INTRINSIC_VOID: {
620 SDValue Chain = Op.getOperand(0);
621 unsigned IntrinsicID =
622 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
623 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000624 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000625 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000626 const SDValue Args[8] = {
627 Chain,
628 Op.getOperand(2), // Export Value
629 Op.getOperand(3), // ArrayBase
630 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000631 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
632 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
633 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
634 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000635 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000636 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000637 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000638
Tom Stellard75aadc22012-12-11 21:25:42 +0000639 // default for switch(IntrinsicID)
640 default: break;
641 }
642 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
643 break;
644 }
645 case ISD::INTRINSIC_WO_CHAIN: {
646 unsigned IntrinsicID =
647 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
648 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000649 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000650 switch(IntrinsicID) {
651 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000652 case AMDGPUIntrinsic::R600_interp_xy:
653 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000654 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000655 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000656 SDValue RegisterINode = Op.getOperand(2);
657 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000658
Vincent Lejeunef143af32013-11-11 22:10:24 +0000659 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000660 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000661 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000662 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000663 else
664 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000665 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000666 RegisterJNode, RegisterINode);
Ahmed Bougacha128f8732016-04-26 21:15:30 +0000667 return DAG.getBuildVector(MVT::v2f32, DL,
668 {SDValue(interp, 0), SDValue(interp, 1)});
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000670 case AMDGPUIntrinsic::r600_tex:
671 case AMDGPUIntrinsic::r600_texc:
672 case AMDGPUIntrinsic::r600_txl:
673 case AMDGPUIntrinsic::r600_txlc:
674 case AMDGPUIntrinsic::r600_txb:
675 case AMDGPUIntrinsic::r600_txbc:
676 case AMDGPUIntrinsic::r600_txf:
677 case AMDGPUIntrinsic::r600_txq:
678 case AMDGPUIntrinsic::r600_ddx:
679 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000680 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000681 unsigned TextureOp;
682 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000683 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000684 TextureOp = 0;
685 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000686 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000687 TextureOp = 1;
688 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000689 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000690 TextureOp = 2;
691 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000692 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 TextureOp = 3;
694 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000695 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 TextureOp = 4;
697 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000698 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000699 TextureOp = 5;
700 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000701 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 TextureOp = 6;
703 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000704 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000705 TextureOp = 7;
706 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000707 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000708 TextureOp = 8;
709 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000710 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000711 TextureOp = 9;
712 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000713 case AMDGPUIntrinsic::R600_ldptr:
714 TextureOp = 10;
715 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 default:
717 llvm_unreachable("Unknow Texture Operation");
718 }
719
720 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000721 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000722 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000723 DAG.getConstant(0, DL, MVT::i32),
724 DAG.getConstant(1, DL, MVT::i32),
725 DAG.getConstant(2, DL, MVT::i32),
726 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000727 Op.getOperand(2),
728 Op.getOperand(3),
729 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000730 DAG.getConstant(0, DL, MVT::i32),
731 DAG.getConstant(1, DL, MVT::i32),
732 DAG.getConstant(2, DL, MVT::i32),
733 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000734 Op.getOperand(5),
735 Op.getOperand(6),
736 Op.getOperand(7),
737 Op.getOperand(8),
738 Op.getOperand(9),
739 Op.getOperand(10)
740 };
Craig Topper48d114b2014-04-26 18:35:24 +0000741 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000742 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 case AMDGPUIntrinsic::AMDGPU_dp4: {
744 SDValue Args[8] = {
745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000756 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000758 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000759 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000760 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000761 };
Craig Topper48d114b2014-04-26 18:35:24 +0000762 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000763 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000764
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000777 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000778 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000779 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000781 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000782 return LowerImplicitParameter(DAG, VT, DL, 8);
783
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000784 case Intrinsic::r600_read_workdim:
785 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000786 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
787 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
788 }
Jan Veselye5121f32014-10-14 20:05:26 +0000789
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
792 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
804 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
807 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000808
809 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000810 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenault79963e82016-02-13 01:03:00 +0000811 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000812
813 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000814 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000815 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
816 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000817 }
818 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
819 break;
820 }
821 } // end switch(Op.getOpcode())
822 return SDValue();
823}
824
825void R600TargetLowering::ReplaceNodeResults(SDNode *N,
826 SmallVectorImpl<SDValue> &Results,
827 SelectionDAG &DAG) const {
828 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000829 default:
830 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
831 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000832 case ISD::FP_TO_UINT:
833 if (N->getValueType(0) == MVT::i1) {
834 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
835 return;
836 }
837 // Fall-through. Since we don't care about out of bounds values
838 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
839 // considers some extra cases which are not necessary here.
840 case ISD::FP_TO_SINT: {
841 SDValue Result;
842 if (expandFP_TO_SINT(N, Result, DAG))
843 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000844 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000845 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000846 case ISD::SDIVREM: {
847 SDValue Op = SDValue(N, 1);
848 SDValue RES = LowerSDIVREM(Op, DAG);
849 Results.push_back(RES);
850 Results.push_back(RES.getValue(1));
851 break;
852 }
853 case ISD::UDIVREM: {
854 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000855 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000856 break;
857 }
858 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000859}
860
Tom Stellard880a80a2014-06-17 16:53:14 +0000861SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
862 SDValue Vector) const {
863
864 SDLoc DL(Vector);
865 EVT VecVT = Vector.getValueType();
866 EVT EltVT = VecVT.getVectorElementType();
867 SmallVector<SDValue, 8> Args;
868
869 for (unsigned i = 0, e = VecVT.getVectorNumElements();
870 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000871 Args.push_back(DAG.getNode(
872 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
873 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000874 }
875
876 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
877}
878
879SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
880 SelectionDAG &DAG) const {
881
882 SDLoc DL(Op);
883 SDValue Vector = Op.getOperand(0);
884 SDValue Index = Op.getOperand(1);
885
886 if (isa<ConstantSDNode>(Index) ||
887 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
888 return Op;
889
890 Vector = vectorToVerticalVector(DAG, Vector);
891 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
892 Vector, Index);
893}
894
895SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
896 SelectionDAG &DAG) const {
897 SDLoc DL(Op);
898 SDValue Vector = Op.getOperand(0);
899 SDValue Value = Op.getOperand(1);
900 SDValue Index = Op.getOperand(2);
901
902 if (isa<ConstantSDNode>(Index) ||
903 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
904 return Op;
905
906 Vector = vectorToVerticalVector(DAG, Vector);
907 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
908 Vector, Value, Index);
909 return vectorToVerticalVector(DAG, Insert);
910}
911
Tom Stellard27233b72016-05-02 18:05:17 +0000912SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
913 SDValue Op,
914 SelectionDAG &DAG) const {
915
916 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
917 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
918 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
919
920 const DataLayout &DL = DAG.getDataLayout();
921 const GlobalValue *GV = GSD->getGlobal();
Tom Stellard27233b72016-05-02 18:05:17 +0000922 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
923
Jan Veselyf97de002016-05-13 20:39:29 +0000924 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
925 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
Tom Stellard27233b72016-05-02 18:05:17 +0000926}
927
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000928SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
929 // On hw >= R700, COS/SIN input must be between -1. and 1.
930 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
931 EVT VT = Op.getValueType();
932 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000933 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000934
935 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000936 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
937 DAG.getNode(ISD::FADD, DL, VT,
938 DAG.getNode(ISD::FMUL, DL, VT, Arg,
939 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
940 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000941 unsigned TrigNode;
942 switch (Op.getOpcode()) {
943 case ISD::FCOS:
944 TrigNode = AMDGPUISD::COS_HW;
945 break;
946 case ISD::FSIN:
947 TrigNode = AMDGPUISD::SIN_HW;
948 break;
949 default:
950 llvm_unreachable("Wrong trig opcode");
951 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000952 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
953 DAG.getNode(ISD::FADD, DL, VT, FractPart,
954 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000955 if (Gen >= AMDGPUSubtarget::R700)
956 return TrigVal;
957 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000958 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
959 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000960}
961
Jan Vesely25f36272014-06-18 12:27:13 +0000962SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
963 SDLoc DL(Op);
964 EVT VT = Op.getValueType();
965
966 SDValue Lo = Op.getOperand(0);
967 SDValue Hi = Op.getOperand(1);
968 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000969 SDValue Zero = DAG.getConstant(0, DL, VT);
970 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000971
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000972 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
973 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000974 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
975 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
976
977 // The dance around Width1 is necessary for 0 special case.
978 // Without it the CompShift might be 32, producing incorrect results in
979 // Overflow. So we do the shift in two steps, the alternative is to
980 // add a conditional to filter the special case.
981
982 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
983 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
984
985 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
986 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
987 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
988
989 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
990 SDValue LoBig = Zero;
991
992 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
993 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
994
995 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
996}
997
Jan Vesely900ff2e2014-06-18 12:27:15 +0000998SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
999 SDLoc DL(Op);
1000 EVT VT = Op.getValueType();
1001
1002 SDValue Lo = Op.getOperand(0);
1003 SDValue Hi = Op.getOperand(1);
1004 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001005 SDValue Zero = DAG.getConstant(0, DL, VT);
1006 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001007
Jan Veselyecf51332014-06-18 12:27:17 +00001008 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1009
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001010 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1011 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001012 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1013 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1014
1015 // The dance around Width1 is necessary for 0 special case.
1016 // Without it the CompShift might be 32, producing incorrect results in
1017 // Overflow. So we do the shift in two steps, the alternative is to
1018 // add a conditional to filter the special case.
1019
1020 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1021 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1022
Jan Veselyecf51332014-06-18 12:27:17 +00001023 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001024 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1025 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1026
Jan Veselyecf51332014-06-18 12:27:17 +00001027 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1028 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001029
1030 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1031 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1032
1033 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1034}
1035
Jan Vesely808fff52015-04-30 17:15:56 +00001036SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1037 unsigned mainop, unsigned ovf) const {
1038 SDLoc DL(Op);
1039 EVT VT = Op.getValueType();
1040
1041 SDValue Lo = Op.getOperand(0);
1042 SDValue Hi = Op.getOperand(1);
1043
1044 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1045 // Extend sign.
1046 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1047 DAG.getValueType(MVT::i1));
1048
1049 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1050
1051 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1052}
1053
Tom Stellard75aadc22012-12-11 21:25:42 +00001054SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001055 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001056 return DAG.getNode(
1057 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001058 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001059 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001060 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001061 DAG.getCondCode(ISD::SETNE)
1062 );
1063}
1064
Tom Stellard75aadc22012-12-11 21:25:42 +00001065SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001066 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001067 unsigned DwordOffset) const {
1068 unsigned ByteOffset = DwordOffset * 4;
1069 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001070 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001071
1072 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1073 assert(isInt<16>(ByteOffset));
1074
1075 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001076 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001077 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1078 false, false, false, 0);
1079}
1080
Tom Stellard75aadc22012-12-11 21:25:42 +00001081bool R600TargetLowering::isZero(SDValue Op) const {
1082 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1083 return Cst->isNullValue();
1084 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1085 return CstFP->isZero();
1086 } else {
1087 return false;
1088 }
1089}
1090
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001091bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1092 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1093 return CFP->isExactlyValue(1.0);
1094 }
1095 return isAllOnesConstant(Op);
1096}
1097
1098bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1099 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1100 return CFP->getValueAPF().isZero();
1101 }
1102 return isNullConstant(Op);
1103}
1104
Tom Stellard75aadc22012-12-11 21:25:42 +00001105SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001106 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001107 EVT VT = Op.getValueType();
1108
1109 SDValue LHS = Op.getOperand(0);
1110 SDValue RHS = Op.getOperand(1);
1111 SDValue True = Op.getOperand(2);
1112 SDValue False = Op.getOperand(3);
1113 SDValue CC = Op.getOperand(4);
1114 SDValue Temp;
1115
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001116 if (VT == MVT::f32) {
1117 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1118 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1119 if (MinMax)
1120 return MinMax;
1121 }
1122
Tom Stellard75aadc22012-12-11 21:25:42 +00001123 // LHS and RHS are guaranteed to be the same value type
1124 EVT CompareVT = LHS.getValueType();
1125
1126 // Check if we can lower this to a native operation.
1127
Tom Stellard2add82d2013-03-08 15:37:09 +00001128 // Try to lower to a SET* instruction:
1129 //
1130 // SET* can match the following patterns:
1131 //
Tom Stellardcd428182013-09-28 02:50:38 +00001132 // select_cc f32, f32, -1, 0, cc_supported
1133 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1134 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001135 //
1136
1137 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001138 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1139 ISD::CondCode InverseCC =
1140 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001141 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1142 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1143 std::swap(False, True);
1144 CC = DAG.getCondCode(InverseCC);
1145 } else {
1146 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1147 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1148 std::swap(False, True);
1149 std::swap(LHS, RHS);
1150 CC = DAG.getCondCode(SwapInvCC);
1151 }
1152 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001153 }
1154
1155 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1156 (CompareVT == VT || VT == MVT::i32)) {
1157 // This can be matched by a SET* instruction.
1158 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1159 }
1160
Tom Stellard75aadc22012-12-11 21:25:42 +00001161 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001162 //
1163 // CND* can match the following patterns:
1164 //
Tom Stellardcd428182013-09-28 02:50:38 +00001165 // select_cc f32, 0.0, f32, f32, cc_supported
1166 // select_cc f32, 0.0, i32, i32, cc_supported
1167 // select_cc i32, 0, f32, f32, cc_supported
1168 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001169 //
Tom Stellardcd428182013-09-28 02:50:38 +00001170
1171 // Try to move the zero value to the RHS
1172 if (isZero(LHS)) {
1173 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1174 // Try swapping the operands
1175 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1176 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1177 std::swap(LHS, RHS);
1178 CC = DAG.getCondCode(CCSwapped);
1179 } else {
1180 // Try inverting the conditon and then swapping the operands
1181 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1182 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1183 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1184 std::swap(True, False);
1185 std::swap(LHS, RHS);
1186 CC = DAG.getCondCode(CCSwapped);
1187 }
1188 }
1189 }
1190 if (isZero(RHS)) {
1191 SDValue Cond = LHS;
1192 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001193 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1194 if (CompareVT != VT) {
1195 // Bitcast True / False to the correct types. This will end up being
1196 // a nop, but it allows us to define only a single pattern in the
1197 // .TD files for each CND* instruction rather than having to have
1198 // one pattern for integer True/False and one for fp True/False
1199 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1200 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1201 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001202
1203 switch (CCOpcode) {
1204 case ISD::SETONE:
1205 case ISD::SETUNE:
1206 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001207 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1208 Temp = True;
1209 True = False;
1210 False = Temp;
1211 break;
1212 default:
1213 break;
1214 }
1215 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1216 Cond, Zero,
1217 True, False,
1218 DAG.getCondCode(CCOpcode));
1219 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1220 }
1221
Tom Stellard75aadc22012-12-11 21:25:42 +00001222 // If we make it this for it means we have no native instructions to handle
1223 // this SELECT_CC, so we must lower it.
1224 SDValue HWTrue, HWFalse;
1225
1226 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001227 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1228 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001229 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001230 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1231 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001232 }
1233 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001234 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001235 }
1236
1237 // Lower this unsupported SELECT_CC into a combination of two supported
1238 // SELECT_CC operations.
1239 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1240
1241 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1242 Cond, HWFalse,
1243 True, False,
1244 DAG.getCondCode(ISD::SETNE));
1245}
1246
Alp Tokercb402912014-01-24 17:20:08 +00001247/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001248/// convert these pointers to a register index. Each register holds
1249/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1250/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1251/// for indirect addressing.
1252SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1253 unsigned StackWidth,
1254 SelectionDAG &DAG) const {
1255 unsigned SRLPad;
1256 switch(StackWidth) {
1257 case 1:
1258 SRLPad = 2;
1259 break;
1260 case 2:
1261 SRLPad = 3;
1262 break;
1263 case 4:
1264 SRLPad = 4;
1265 break;
1266 default: llvm_unreachable("Invalid stack width");
1267 }
1268
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001269 SDLoc DL(Ptr);
1270 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1271 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001272}
1273
1274void R600TargetLowering::getStackAddress(unsigned StackWidth,
1275 unsigned ElemIdx,
1276 unsigned &Channel,
1277 unsigned &PtrIncr) const {
1278 switch (StackWidth) {
1279 default:
1280 case 1:
1281 Channel = 0;
1282 if (ElemIdx > 0) {
1283 PtrIncr = 1;
1284 } else {
1285 PtrIncr = 0;
1286 }
1287 break;
1288 case 2:
1289 Channel = ElemIdx % 2;
1290 if (ElemIdx == 2) {
1291 PtrIncr = 1;
1292 } else {
1293 PtrIncr = 0;
1294 }
1295 break;
1296 case 4:
1297 Channel = ElemIdx;
1298 PtrIncr = 0;
1299 break;
1300 }
1301}
1302
Matt Arsenault95245662016-02-11 05:32:46 +00001303SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1304 SelectionDAG &DAG) const {
1305 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001306
Matt Arsenault95245662016-02-11 05:32:46 +00001307 unsigned Mask = 0;
1308 if (Store->getMemoryVT() == MVT::i8) {
1309 Mask = 0xff;
1310 } else if (Store->getMemoryVT() == MVT::i16) {
1311 Mask = 0xffff;
1312 }
1313
1314 SDValue Chain = Store->getChain();
1315 SDValue BasePtr = Store->getBasePtr();
1316 EVT MemVT = Store->getMemoryVT();
1317
1318 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1319 DAG.getConstant(2, DL, MVT::i32));
1320 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1321 Chain, Ptr,
1322 DAG.getTargetConstant(0, DL, MVT::i32));
1323
1324 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1325 DAG.getConstant(0x3, DL, MVT::i32));
1326
1327 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1328 DAG.getConstant(3, DL, MVT::i32));
1329
1330 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1331 Store->getValue());
1332
1333 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1334
1335 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1336 MaskedValue, ShiftAmt);
1337
1338 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1339 DAG.getConstant(Mask, DL, MVT::i32),
1340 ShiftAmt);
1341 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1342 DAG.getConstant(0xffffffff, DL, MVT::i32));
1343 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1344
1345 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1346 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1347 Chain, Value, Ptr,
1348 DAG.getTargetConstant(0, DL, MVT::i32));
1349}
1350
1351SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1352 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001353 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001354
Matt Arsenault95245662016-02-11 05:32:46 +00001355 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1356 unsigned AS = StoreNode->getAddressSpace();
1357 SDValue Value = StoreNode->getValue();
1358 EVT ValueVT = Value.getValueType();
1359
1360 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1361 ValueVT.isVector()) {
1362 return SplitVectorStore(Op, DAG);
1363 }
1364
1365 SDLoc DL(Op);
1366 SDValue Chain = StoreNode->getChain();
1367 SDValue Ptr = StoreNode->getBasePtr();
1368
1369 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001370 if (StoreNode->isTruncatingStore()) {
1371 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001372 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001373 EVT MemVT = StoreNode->getMemoryVT();
1374 SDValue MaskConstant;
1375 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001376 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001377 } else {
1378 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001379 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001380 }
1381 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001382 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001383 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001384 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001385 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1386 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001387 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001388 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1389 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1390 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1391 // vector instead.
1392 SDValue Src[4] = {
1393 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001394 DAG.getConstant(0, DL, MVT::i32),
1395 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001396 Mask
1397 };
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001398 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001399 SDValue Args[3] = { Chain, Input, DWordAddr };
1400 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001401 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001402 StoreNode->getMemOperand());
1403 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001404 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001405 // Convert pointer from byte address to dword address.
1406 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1407 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001408 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001409
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001410 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001411 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001412 } else {
1413 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1414 }
1415 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001416 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001417 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001418
Matt Arsenault95245662016-02-11 05:32:46 +00001419 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001420 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001421
Matt Arsenault95245662016-02-11 05:32:46 +00001422 EVT MemVT = StoreNode->getMemoryVT();
1423 if (MemVT.bitsLT(MVT::i32))
1424 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001425
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001426 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001427 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001428 const AMDGPUFrameLowering *TFL =
1429 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001430 unsigned StackWidth = TFL->getStackWidth(MF);
1431
1432 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1433
1434 if (ValueVT.isVector()) {
1435 unsigned NumElemVT = ValueVT.getVectorNumElements();
1436 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001437 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001438
1439 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1440 "vector width in load");
1441
1442 for (unsigned i = 0; i < NumElemVT; ++i) {
1443 unsigned Channel, PtrIncr;
1444 getStackAddress(StackWidth, i, Channel, PtrIncr);
1445 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001446 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001447 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001448 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001449
1450 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1451 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001452 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001453 }
Craig Topper48d114b2014-04-26 18:35:24 +00001454 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001455 } else {
1456 if (ValueVT == MVT::i8) {
1457 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1458 }
1459 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001460 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001461 }
1462
1463 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001464}
1465
Tom Stellard365366f2013-01-23 02:09:06 +00001466// return (512 + (kc_bank << 12)
1467static int
1468ConstantAddressBlock(unsigned AddressSpace) {
1469 switch (AddressSpace) {
1470 case AMDGPUAS::CONSTANT_BUFFER_0:
1471 return 512;
1472 case AMDGPUAS::CONSTANT_BUFFER_1:
1473 return 512 + 4096;
1474 case AMDGPUAS::CONSTANT_BUFFER_2:
1475 return 512 + 4096 * 2;
1476 case AMDGPUAS::CONSTANT_BUFFER_3:
1477 return 512 + 4096 * 3;
1478 case AMDGPUAS::CONSTANT_BUFFER_4:
1479 return 512 + 4096 * 4;
1480 case AMDGPUAS::CONSTANT_BUFFER_5:
1481 return 512 + 4096 * 5;
1482 case AMDGPUAS::CONSTANT_BUFFER_6:
1483 return 512 + 4096 * 6;
1484 case AMDGPUAS::CONSTANT_BUFFER_7:
1485 return 512 + 4096 * 7;
1486 case AMDGPUAS::CONSTANT_BUFFER_8:
1487 return 512 + 4096 * 8;
1488 case AMDGPUAS::CONSTANT_BUFFER_9:
1489 return 512 + 4096 * 9;
1490 case AMDGPUAS::CONSTANT_BUFFER_10:
1491 return 512 + 4096 * 10;
1492 case AMDGPUAS::CONSTANT_BUFFER_11:
1493 return 512 + 4096 * 11;
1494 case AMDGPUAS::CONSTANT_BUFFER_12:
1495 return 512 + 4096 * 12;
1496 case AMDGPUAS::CONSTANT_BUFFER_13:
1497 return 512 + 4096 * 13;
1498 case AMDGPUAS::CONSTANT_BUFFER_14:
1499 return 512 + 4096 * 14;
1500 case AMDGPUAS::CONSTANT_BUFFER_15:
1501 return 512 + 4096 * 15;
1502 default:
1503 return -1;
1504 }
1505}
1506
Matt Arsenault6dfda962016-02-10 18:21:39 +00001507SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1508 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001509 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001510 LoadSDNode *Load = cast<LoadSDNode>(Op);
1511 ISD::LoadExtType ExtType = Load->getExtensionType();
1512 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001513
Matt Arsenault6dfda962016-02-10 18:21:39 +00001514 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1515 // register (2-)byte extract.
1516
1517 // Get Register holding the target.
1518 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1519 DAG.getConstant(2, DL, MVT::i32));
1520 // Load the Register.
1521 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1522 Load->getChain(),
1523 Ptr,
1524 DAG.getTargetConstant(0, DL, MVT::i32),
1525 Op.getOperand(2));
1526
1527 // Get offset within the register.
1528 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1529 Load->getBasePtr(),
1530 DAG.getConstant(0x3, DL, MVT::i32));
1531
1532 // Bit offset of target byte (byteIdx * 8).
1533 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1534 DAG.getConstant(3, DL, MVT::i32));
1535
1536 // Shift to the right.
1537 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1538
1539 // Eliminate the upper bits by setting them to ...
1540 EVT MemEltVT = MemVT.getScalarType();
1541
1542 // ... ones.
1543 if (ExtType == ISD::SEXTLOAD) {
1544 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1545
1546 SDValue Ops[] = {
1547 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1548 Load->getChain()
1549 };
1550
1551 return DAG.getMergeValues(Ops, DL);
1552 }
1553
1554 // ... or zeros.
1555 SDValue Ops[] = {
1556 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1557 Load->getChain()
1558 };
1559
1560 return DAG.getMergeValues(Ops, DL);
1561}
1562
1563SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1564 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1565 unsigned AS = LoadNode->getAddressSpace();
1566 EVT MemVT = LoadNode->getMemoryVT();
1567 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1568
1569 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1570 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1571 return lowerPrivateExtLoad(Op, DAG);
1572 }
1573
1574 SDLoc DL(Op);
1575 EVT VT = Op.getValueType();
1576 SDValue Chain = LoadNode->getChain();
1577 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001578
Tom Stellard35bb18c2013-08-26 15:06:04 +00001579 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1580 SDValue MergedValues[2] = {
Matt Arsenault9c499c32016-04-14 23:31:26 +00001581 scalarizeVectorLoad(LoadNode, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001582 Chain
1583 };
Craig Topper64941d92014-04-27 19:20:57 +00001584 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001585 }
1586
Tom Stellard365366f2013-01-23 02:09:06 +00001587 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001588 if (ConstantBlock > -1 &&
1589 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1590 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001591 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001592 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1593 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001594 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001595 SDValue Slots[4];
1596 for (unsigned i = 0; i < 4; i++) {
1597 // We want Const position encoded with the following formula :
1598 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1599 // const_index is Ptr computed by llvm using an alignment of 16.
1600 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1601 // then div by 4 at the ISel step
1602 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001603 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001604 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1605 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001606 EVT NewVT = MVT::v4i32;
1607 unsigned NumElements = 4;
1608 if (VT.isVector()) {
1609 NewVT = VT;
1610 NumElements = VT.getVectorNumElements();
1611 }
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001612 Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001613 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001614 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001615 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001616 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1617 DAG.getConstant(4, DL, MVT::i32)),
1618 DAG.getConstant(LoadNode->getAddressSpace() -
1619 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001620 );
1621 }
1622
1623 if (!VT.isVector()) {
1624 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001625 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001626 }
1627
1628 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001629 Result,
1630 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001631 };
Craig Topper64941d92014-04-27 19:20:57 +00001632 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001633 }
1634
Matt Arsenault6dfda962016-02-10 18:21:39 +00001635 SDValue LoweredLoad;
1636
Matt Arsenault909d0c02013-10-30 23:43:29 +00001637 // For most operations returning SDValue() will result in the node being
1638 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1639 // need to manually expand loads that may be legal in some address spaces and
1640 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1641 // compute shaders, since the data is sign extended when it is uploaded to the
1642 // buffer. However SEXT loads from other address spaces are not supported, so
1643 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001644 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1645 EVT MemVT = LoadNode->getMemoryVT();
1646 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001647 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1648 LoadNode->getPointerInfo(), MemVT,
1649 LoadNode->isVolatile(),
1650 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001651 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001652 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001653 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1654 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001655
Jan Veselyb670d372015-05-26 18:07:22 +00001656 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001657 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001658 }
1659
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001660 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1661 return SDValue();
1662 }
1663
1664 // Lowering for indirect addressing
1665 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001666 const AMDGPUFrameLowering *TFL =
1667 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001668 unsigned StackWidth = TFL->getStackWidth(MF);
1669
1670 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1671
1672 if (VT.isVector()) {
1673 unsigned NumElemVT = VT.getVectorNumElements();
1674 EVT ElemVT = VT.getVectorElementType();
1675 SDValue Loads[4];
1676
Jan Vesely687ca8d2016-05-16 23:56:32 +00001677 assert(NumElemVT <= 4);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001678 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1679 "vector width in load");
1680
1681 for (unsigned i = 0; i < NumElemVT; ++i) {
1682 unsigned Channel, PtrIncr;
1683 getStackAddress(StackWidth, i, Channel, PtrIncr);
1684 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001685 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001686 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1687 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001688 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001689 Op.getOperand(2));
1690 }
Jan Vesely687ca8d2016-05-16 23:56:32 +00001691 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
1692 LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001693 } else {
1694 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1695 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001696 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001697 Op.getOperand(2));
1698 }
1699
Matt Arsenault7939acd2014-04-07 16:44:24 +00001700 SDValue Ops[2] = {
1701 LoweredLoad,
1702 Chain
1703 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001704
Craig Topper64941d92014-04-27 19:20:57 +00001705 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001706}
Tom Stellard75aadc22012-12-11 21:25:42 +00001707
Matt Arsenault1d555c42014-06-23 18:00:55 +00001708SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1709 SDValue Chain = Op.getOperand(0);
1710 SDValue Cond = Op.getOperand(1);
1711 SDValue Jump = Op.getOperand(2);
1712
1713 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1714 Chain, Jump, Cond);
1715}
1716
Matt Arsenault81d06012016-03-07 21:10:13 +00001717SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1718 SelectionDAG &DAG) const {
1719 MachineFunction &MF = DAG.getMachineFunction();
1720 const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
1721
1722 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1723
1724 unsigned FrameIndex = FIN->getIndex();
1725 unsigned IgnoredFrameReg;
1726 unsigned Offset =
1727 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1728 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1729 Op.getValueType());
1730}
1731
Tom Stellard75aadc22012-12-11 21:25:42 +00001732/// XXX Only kernel functions are supported, so we can assume for now that
1733/// every function is a kernel function, but in the future we should use
1734/// separate calling conventions for kernel and non-kernel functions.
1735SDValue R600TargetLowering::LowerFormalArguments(
1736 SDValue Chain,
1737 CallingConv::ID CallConv,
1738 bool isVarArg,
1739 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001740 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001741 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001742 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001743 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1744 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001745 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001746 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001747
Tom Stellardaf775432013-10-23 00:44:32 +00001748 SmallVector<ISD::InputArg, 8> LocalIns;
1749
Matt Arsenault209a7b92014-04-18 07:40:20 +00001750 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001751
1752 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001753
Tom Stellard1e803092013-07-23 01:48:18 +00001754 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001755 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001756 const ISD::InputArg &In = Ins[i];
1757 EVT VT = In.VT;
1758 EVT MemVT = VA.getLocVT();
1759 if (!VT.isVector() && MemVT.isVector()) {
1760 // Get load source type if scalarized.
1761 MemVT = MemVT.getVectorElementType();
1762 }
Tom Stellard78e01292013-07-23 01:47:58 +00001763
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001764 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001765 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1766 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1767 InVals.push_back(Register);
1768 continue;
1769 }
1770
Tom Stellard75aadc22012-12-11 21:25:42 +00001771 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001772 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001773
Matt Arsenaultfae02982014-03-17 18:58:11 +00001774 // i64 isn't a legal type, so the register type used ends up as i32, which
1775 // isn't expected here. It attempts to create this sextload, but it ends up
1776 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1777 // for <1 x i64>.
1778
Tom Stellardacfeebf2013-07-23 01:48:05 +00001779 // The first 36 bytes of the input buffer contains information about
1780 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001781 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1782 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1783 // FIXME: This should really check the extload type, but the handling of
1784 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001785
Matt Arsenault74ef2772014-08-13 18:14:11 +00001786 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1787 Ext = ISD::SEXTLOAD;
1788 }
1789
1790 // Compute the offset from the value.
1791 // XXX - I think PartOffset should give you this, but it seems to give the
1792 // size of the register which isn't useful.
1793
Andrew Trick05938a52015-02-16 18:10:47 +00001794 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001795 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001796 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001797
1798 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1799 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001800 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001801 DAG.getUNDEF(MVT::i32),
1802 PtrInfo,
1803 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001804
1805 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001806 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001807 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001808 }
1809 return Chain;
1810}
1811
Mehdi Amini44ede332015-07-09 02:09:04 +00001812EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1813 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001814 if (!VT.isVector())
1815 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001816 return VT.changeVectorElementTypeToInteger();
1817}
1818
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001819bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1820 unsigned AddrSpace,
1821 unsigned Align,
1822 bool *IsFast) const {
1823 if (IsFast)
1824 *IsFast = false;
1825
1826 if (!VT.isSimple() || VT == MVT::Other)
1827 return false;
1828
1829 if (VT.bitsLT(MVT::i32))
1830 return false;
1831
1832 // TODO: This is a rough estimate.
1833 if (IsFast)
1834 *IsFast = true;
1835
1836 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1837}
1838
Matt Arsenault209a7b92014-04-18 07:40:20 +00001839static SDValue CompactSwizzlableVector(
1840 SelectionDAG &DAG, SDValue VectorEntry,
1841 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001842 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1843 assert(RemapSwizzle.empty());
1844 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001845 VectorEntry.getOperand(0),
1846 VectorEntry.getOperand(1),
1847 VectorEntry.getOperand(2),
1848 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001849 };
1850
1851 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001852 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001853 // We mask write here to teach later passes that the ith element of this
1854 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1855 // break false dependencies and additionnaly make assembly easier to read.
1856 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001857 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1858 if (C->isZero()) {
1859 RemapSwizzle[i] = 4; // SEL_0
1860 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1861 } else if (C->isExactlyValue(1.0)) {
1862 RemapSwizzle[i] = 5; // SEL_1
1863 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1864 }
1865 }
1866
Sanjay Patel57195842016-03-14 17:28:46 +00001867 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001868 continue;
1869 for (unsigned j = 0; j < i; j++) {
1870 if (NewBldVec[i] == NewBldVec[j]) {
1871 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1872 RemapSwizzle[i] = j;
1873 break;
1874 }
1875 }
1876 }
1877
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001878 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1879 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001880}
1881
Benjamin Kramer193960c2013-06-11 13:32:25 +00001882static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1883 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001884 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1885 assert(RemapSwizzle.empty());
1886 SDValue NewBldVec[4] = {
1887 VectorEntry.getOperand(0),
1888 VectorEntry.getOperand(1),
1889 VectorEntry.getOperand(2),
1890 VectorEntry.getOperand(3)
1891 };
1892 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001893 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001894 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001895 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1896 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1897 ->getZExtValue();
1898 if (i == Idx)
1899 isUnmovable[Idx] = true;
1900 }
1901 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001902
1903 for (unsigned i = 0; i < 4; i++) {
1904 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1905 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1906 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001907 if (isUnmovable[Idx])
1908 continue;
1909 // Swap i and Idx
1910 std::swap(NewBldVec[Idx], NewBldVec[i]);
1911 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1912 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001913 }
1914 }
1915
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001916 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1917 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001918}
1919
1920
1921SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001922 SDValue Swz[4], SelectionDAG &DAG,
1923 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001924 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1925 // Old -> New swizzle values
1926 DenseMap<unsigned, unsigned> SwizzleRemap;
1927
1928 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1929 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001930 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001931 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001932 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001933 }
1934
1935 SwizzleRemap.clear();
1936 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1937 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001938 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001939 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001940 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001941 }
1942
1943 return BuildVector;
1944}
1945
1946
Tom Stellard75aadc22012-12-11 21:25:42 +00001947//===----------------------------------------------------------------------===//
1948// Custom DAG Optimizations
1949//===----------------------------------------------------------------------===//
1950
1951SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1952 DAGCombinerInfo &DCI) const {
1953 SelectionDAG &DAG = DCI.DAG;
1954
1955 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001956 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001957 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1958 case ISD::FP_ROUND: {
1959 SDValue Arg = N->getOperand(0);
1960 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001961 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001962 Arg.getOperand(0));
1963 }
1964 break;
1965 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001966
1967 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1968 // (i32 select_cc f32, f32, -1, 0 cc)
1969 //
1970 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1971 // this to one of the SET*_DX10 instructions.
1972 case ISD::FP_TO_SINT: {
1973 SDValue FNeg = N->getOperand(0);
1974 if (FNeg.getOpcode() != ISD::FNEG) {
1975 return SDValue();
1976 }
1977 SDValue SelectCC = FNeg.getOperand(0);
1978 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1979 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1980 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1981 !isHWTrueValue(SelectCC.getOperand(2)) ||
1982 !isHWFalseValue(SelectCC.getOperand(3))) {
1983 return SDValue();
1984 }
1985
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001986 SDLoc dl(N);
1987 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001988 SelectCC.getOperand(0), // LHS
1989 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001990 DAG.getConstant(-1, dl, MVT::i32), // True
1991 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001992 SelectCC.getOperand(4)); // CC
1993
1994 break;
1995 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001996
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001997 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1998 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001999 case ISD::INSERT_VECTOR_ELT: {
2000 SDValue InVec = N->getOperand(0);
2001 SDValue InVal = N->getOperand(1);
2002 SDValue EltNo = N->getOperand(2);
2003 SDLoc dl(N);
2004
2005 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00002006 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00002007 return InVec;
2008
2009 EVT VT = InVec.getValueType();
2010
2011 // If we can't generate a legal BUILD_VECTOR, exit
2012 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
2013 return SDValue();
2014
2015 // Check that we know which element is being inserted
2016 if (!isa<ConstantSDNode>(EltNo))
2017 return SDValue();
2018 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2019
2020 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2021 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2022 // vector elements.
2023 SmallVector<SDValue, 8> Ops;
2024 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2025 Ops.append(InVec.getNode()->op_begin(),
2026 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00002027 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00002028 unsigned NElts = VT.getVectorNumElements();
2029 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2030 } else {
2031 return SDValue();
2032 }
2033
2034 // Insert the element
2035 if (Elt < Ops.size()) {
2036 // All the operands of BUILD_VECTOR must have the same type;
2037 // we enforce that here.
2038 EVT OpVT = Ops[0].getValueType();
2039 if (InVal.getValueType() != OpVT)
2040 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2041 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2042 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2043 Ops[Elt] = InVal;
2044 }
2045
2046 // Return the new vector
Ahmed Bougacha128f8732016-04-26 21:15:30 +00002047 return DAG.getBuildVector(VT, dl, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002048 }
2049
Tom Stellard365366f2013-01-23 02:09:06 +00002050 // Extract_vec (Build_vector) generated by custom lowering
2051 // also needs to be customly combined
2052 case ISD::EXTRACT_VECTOR_ELT: {
2053 SDValue Arg = N->getOperand(0);
2054 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2055 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2056 unsigned Element = Const->getZExtValue();
2057 return Arg->getOperand(Element);
2058 }
2059 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002060 if (Arg.getOpcode() == ISD::BITCAST &&
2061 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2062 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2063 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002064 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002065 Arg->getOperand(0).getOperand(Element));
2066 }
2067 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002068 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002069 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002070
2071 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002072 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002073 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002074 return Ret;
2075
Tom Stellarde06163a2013-02-07 14:02:35 +00002076 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2077 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002078 //
2079 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2080 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002081 SDValue LHS = N->getOperand(0);
2082 if (LHS.getOpcode() != ISD::SELECT_CC) {
2083 return SDValue();
2084 }
2085
2086 SDValue RHS = N->getOperand(1);
2087 SDValue True = N->getOperand(2);
2088 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002089 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002090
2091 if (LHS.getOperand(2).getNode() != True.getNode() ||
2092 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002093 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002094 return SDValue();
2095 }
2096
Tom Stellard5e524892013-03-08 15:37:11 +00002097 switch (NCC) {
2098 default: return SDValue();
2099 case ISD::SETNE: return LHS;
2100 case ISD::SETEQ: {
2101 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2102 LHSCC = ISD::getSetCCInverse(LHSCC,
2103 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002104 if (DCI.isBeforeLegalizeOps() ||
2105 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2106 return DAG.getSelectCC(SDLoc(N),
2107 LHS.getOperand(0),
2108 LHS.getOperand(1),
2109 LHS.getOperand(2),
2110 LHS.getOperand(3),
2111 LHSCC);
2112 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002113 }
Tom Stellard5e524892013-03-08 15:37:11 +00002114 }
Tom Stellardcd428182013-09-28 02:50:38 +00002115 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002116 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002117
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002118 case AMDGPUISD::EXPORT: {
2119 SDValue Arg = N->getOperand(1);
2120 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2121 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002122
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002123 SDValue NewArgs[8] = {
2124 N->getOperand(0), // Chain
2125 SDValue(),
2126 N->getOperand(2), // ArrayBase
2127 N->getOperand(3), // Type
2128 N->getOperand(4), // SWZ_X
2129 N->getOperand(5), // SWZ_Y
2130 N->getOperand(6), // SWZ_Z
2131 N->getOperand(7) // SWZ_W
2132 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002133 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002134 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002135 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002136 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002137 case AMDGPUISD::TEXTURE_FETCH: {
2138 SDValue Arg = N->getOperand(1);
2139 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2140 break;
2141
2142 SDValue NewArgs[19] = {
2143 N->getOperand(0),
2144 N->getOperand(1),
2145 N->getOperand(2),
2146 N->getOperand(3),
2147 N->getOperand(4),
2148 N->getOperand(5),
2149 N->getOperand(6),
2150 N->getOperand(7),
2151 N->getOperand(8),
2152 N->getOperand(9),
2153 N->getOperand(10),
2154 N->getOperand(11),
2155 N->getOperand(12),
2156 N->getOperand(13),
2157 N->getOperand(14),
2158 N->getOperand(15),
2159 N->getOperand(16),
2160 N->getOperand(17),
2161 N->getOperand(18),
2162 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002163 SDLoc DL(N);
2164 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2165 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002166 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002167 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002168
2169 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002170}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002171
2172static bool
2173FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002174 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002175 const R600InstrInfo *TII =
2176 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002177 if (!Src.isMachineOpcode())
2178 return false;
2179 switch (Src.getMachineOpcode()) {
2180 case AMDGPU::FNEG_R600:
2181 if (!Neg.getNode())
2182 return false;
2183 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002184 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002185 return true;
2186 case AMDGPU::FABS_R600:
2187 if (!Abs.getNode())
2188 return false;
2189 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002190 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002191 return true;
2192 case AMDGPU::CONST_COPY: {
2193 unsigned Opcode = ParentNode->getMachineOpcode();
2194 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2195
2196 if (!Sel.getNode())
2197 return false;
2198
2199 SDValue CstOffset = Src.getOperand(0);
2200 if (ParentNode->getValueType(0).isVector())
2201 return false;
2202
2203 // Gather constants values
2204 int SrcIndices[] = {
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2216 };
2217 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002218 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002219 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2220 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2221 continue;
2222 if (HasDst) {
2223 OtherSrcIdx--;
2224 OtherSelIdx--;
2225 }
2226 if (RegisterSDNode *Reg =
2227 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2228 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002229 ConstantSDNode *Cst
2230 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002231 Consts.push_back(Cst->getZExtValue());
2232 }
2233 }
2234 }
2235
Matt Arsenault37c12d72014-05-12 20:42:57 +00002236 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002237 Consts.push_back(Cst->getZExtValue());
2238 if (!TII->fitsConstReadLimitations(Consts)) {
2239 return false;
2240 }
2241
2242 Sel = CstOffset;
2243 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2244 return true;
2245 }
Jan Vesely16800392016-05-13 20:39:31 +00002246 case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2247 // Check if the Imm slot is used. Taken from below.
2248 if (cast<ConstantSDNode>(Imm)->getZExtValue())
2249 return false;
2250 Imm = Src.getOperand(0);
2251 Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2252 return true;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002253 case AMDGPU::MOV_IMM_I32:
2254 case AMDGPU::MOV_IMM_F32: {
2255 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2256 uint64_t ImmValue = 0;
2257
2258
2259 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2260 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2261 float FloatValue = FPC->getValueAPF().convertToFloat();
2262 if (FloatValue == 0.0) {
2263 ImmReg = AMDGPU::ZERO;
2264 } else if (FloatValue == 0.5) {
2265 ImmReg = AMDGPU::HALF;
2266 } else if (FloatValue == 1.0) {
2267 ImmReg = AMDGPU::ONE;
2268 } else {
2269 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2270 }
2271 } else {
2272 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2273 uint64_t Value = C->getZExtValue();
2274 if (Value == 0) {
2275 ImmReg = AMDGPU::ZERO;
2276 } else if (Value == 1) {
2277 ImmReg = AMDGPU::ONE_INT;
2278 } else {
2279 ImmValue = Value;
2280 }
2281 }
2282
2283 // Check that we aren't already using an immediate.
2284 // XXX: It's possible for an instruction to have more than one
2285 // immediate operand, but this is not supported yet.
2286 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2287 if (!Imm.getNode())
2288 return false;
2289 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2290 assert(C);
2291 if (C->getZExtValue())
2292 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002293 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002294 }
2295 Src = DAG.getRegister(ImmReg, MVT::i32);
2296 return true;
2297 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002298 default:
2299 return false;
2300 }
2301}
2302
2303
2304/// \brief Fold the instructions after selecting them
2305SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2306 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002307 const R600InstrInfo *TII =
2308 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002309 if (!Node->isMachineOpcode())
2310 return Node;
2311 unsigned Opcode = Node->getMachineOpcode();
2312 SDValue FakeOp;
2313
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002314 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002315
2316 if (Opcode == AMDGPU::DOT_4) {
2317 int OperandIdx[] = {
2318 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2319 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2320 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2321 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2322 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2323 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2324 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2325 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002326 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002327 int NegIdx[] = {
2328 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2329 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2330 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2331 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2332 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2333 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2334 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2335 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2336 };
2337 int AbsIdx[] = {
2338 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2339 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2340 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2341 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2342 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2343 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2344 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2345 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2346 };
2347 for (unsigned i = 0; i < 8; i++) {
2348 if (OperandIdx[i] < 0)
2349 return Node;
2350 SDValue &Src = Ops[OperandIdx[i] - 1];
2351 SDValue &Neg = Ops[NegIdx[i] - 1];
2352 SDValue &Abs = Ops[AbsIdx[i] - 1];
2353 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2354 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2355 if (HasDst)
2356 SelIdx--;
2357 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002358 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2359 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2360 }
2361 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2362 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2363 SDValue &Src = Ops[i];
2364 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002365 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2366 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002367 } else if (Opcode == AMDGPU::CLAMP_R600) {
2368 SDValue Src = Node->getOperand(0);
2369 if (!Src.isMachineOpcode() ||
2370 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2371 return Node;
2372 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2373 AMDGPU::OpName::clamp);
2374 if (ClampIdx < 0)
2375 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002376 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002377 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002378 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2379 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2380 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002381 } else {
2382 if (!TII->hasInstrModifiers(Opcode))
2383 return Node;
2384 int OperandIdx[] = {
2385 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2386 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2387 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2388 };
2389 int NegIdx[] = {
2390 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2391 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2392 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2393 };
2394 int AbsIdx[] = {
2395 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2396 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2397 -1
2398 };
2399 for (unsigned i = 0; i < 3; i++) {
2400 if (OperandIdx[i] < 0)
2401 return Node;
2402 SDValue &Src = Ops[OperandIdx[i] - 1];
2403 SDValue &Neg = Ops[NegIdx[i] - 1];
2404 SDValue FakeAbs;
2405 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2406 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2407 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002408 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2409 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002410 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002411 ImmIdx--;
2412 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002413 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002414 SDValue &Imm = Ops[ImmIdx];
2415 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002416 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2417 }
2418 }
2419
2420 return Node;
2421}