blob: 6405a82b3a802849d4e8741afee3c28059bba525 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +000093
94 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +000096 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
98 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
99 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000100 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
101 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
102
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000105 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000107 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
108 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setOperationAction(ISD::LOAD, MVT::i32, Custom);
111 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000112 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
113
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000115 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000116 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000117 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000118 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119
Michel Danzer49812b52013-07-10 16:37:07 +0000120 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
121
Tom Stellardb852af52013-03-08 15:37:03 +0000122 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000124 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125}
126
127MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
128 MachineInstr * MI, MachineBasicBlock * BB) const {
129 MachineFunction * MF = BB->getParent();
130 MachineRegisterInfo &MRI = MF->getRegInfo();
131 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000132 const R600InstrInfo *TII =
133 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
135 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000136 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000137 // Replace LDS_*_RET instruction that don't have any uses with the
138 // equivalent LDS_*_NORET instruction.
139 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000140 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
141 assert(DstIdx != -1);
142 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000143 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
144 return BB;
145
146 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000148 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149 NewMI.addOperand(MI->getOperand(i));
150 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000151 } else {
152 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
153 }
154 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000155 case AMDGPU::CLAMP_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
161 break;
162 }
163
164 case AMDGPU::FABS_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
170 break;
171 }
172
173 case AMDGPU::FNEG_R600: {
174 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
175 AMDGPU::MOV,
176 MI->getOperand(0).getReg(),
177 MI->getOperand(1).getReg());
178 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
179 break;
180 }
181
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::MASK_WRITE: {
183 unsigned maskedRegister = MI->getOperand(0).getReg();
184 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
185 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
186 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
187 break;
188 }
189
190 case AMDGPU::MOV_IMM_F32:
191 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
192 MI->getOperand(1).getFPImm()->getValueAPF()
193 .bitcastToAPInt().getZExtValue());
194 break;
195 case AMDGPU::MOV_IMM_I32:
196 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
197 MI->getOperand(1).getImm());
198 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000199 case AMDGPU::CONST_COPY: {
200 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
201 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000202 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000203 MI->getOperand(1).getImm());
204 break;
205 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000206
207 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000208 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000210 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000211
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
213 .addOperand(MI->getOperand(0))
214 .addOperand(MI->getOperand(1))
215 .addImm(EOP); // Set End of program bit
216 break;
217 }
218
Tom Stellard75aadc22012-12-11 21:25:42 +0000219 case AMDGPU::TXD: {
220 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
221 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000222 MachineOperand &RID = MI->getOperand(4);
223 MachineOperand &SID = MI->getOperand(5);
224 unsigned TextureId = MI->getOperand(6).getImm();
225 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
226 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000227
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 switch (TextureId) {
229 case 5: // Rect
230 CTX = CTY = 0;
231 break;
232 case 6: // Shadow1D
233 SrcW = SrcZ;
234 break;
235 case 7: // Shadow2D
236 SrcW = SrcZ;
237 break;
238 case 8: // ShadowRect
239 CTX = CTY = 0;
240 SrcW = SrcZ;
241 break;
242 case 9: // 1DArray
243 SrcZ = SrcY;
244 CTZ = 0;
245 break;
246 case 10: // 2DArray
247 CTZ = 0;
248 break;
249 case 11: // Shadow1DArray
250 SrcZ = SrcY;
251 CTZ = 0;
252 break;
253 case 12: // Shadow2DArray
254 CTZ = 0;
255 break;
256 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
258 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000259 .addImm(SrcX)
260 .addImm(SrcY)
261 .addImm(SrcZ)
262 .addImm(SrcW)
263 .addImm(0)
264 .addImm(0)
265 .addImm(0)
266 .addImm(0)
267 .addImm(1)
268 .addImm(2)
269 .addImm(3)
270 .addOperand(RID)
271 .addOperand(SID)
272 .addImm(CTX)
273 .addImm(CTY)
274 .addImm(CTZ)
275 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
277 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000278 .addImm(SrcX)
279 .addImm(SrcY)
280 .addImm(SrcZ)
281 .addImm(SrcW)
282 .addImm(0)
283 .addImm(0)
284 .addImm(0)
285 .addImm(0)
286 .addImm(1)
287 .addImm(2)
288 .addImm(3)
289 .addOperand(RID)
290 .addOperand(SID)
291 .addImm(CTX)
292 .addImm(CTY)
293 .addImm(CTZ)
294 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000295 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
296 .addOperand(MI->getOperand(0))
297 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 .addImm(SrcX)
299 .addImm(SrcY)
300 .addImm(SrcZ)
301 .addImm(SrcW)
302 .addImm(0)
303 .addImm(0)
304 .addImm(0)
305 .addImm(0)
306 .addImm(1)
307 .addImm(2)
308 .addImm(3)
309 .addOperand(RID)
310 .addOperand(SID)
311 .addImm(CTX)
312 .addImm(CTY)
313 .addImm(CTZ)
314 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000315 .addReg(T0, RegState::Implicit)
316 .addReg(T1, RegState::Implicit);
317 break;
318 }
319
320 case AMDGPU::TXD_SHADOW: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000323 MachineOperand &RID = MI->getOperand(4);
324 MachineOperand &SID = MI->getOperand(5);
325 unsigned TextureId = MI->getOperand(6).getImm();
326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
328
329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000358
359 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
360 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
379 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
398 .addOperand(MI->getOperand(0))
399 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
419 break;
420 }
421
422 case AMDGPU::BRANCH:
423 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 break;
426
427 case AMDGPU::BRANCH_COND_f32: {
428 MachineInstr *NewMI =
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
430 AMDGPU::PREDICATE_BIT)
431 .addOperand(MI->getOperand(1))
432 .addImm(OPCODE_IS_NOT_ZERO)
433 .addImm(0); // Flags
434 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000435 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000436 .addOperand(MI->getOperand(0))
437 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
438 break;
439 }
440
441 case AMDGPU::BRANCH_COND_i32: {
442 MachineInstr *NewMI =
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
444 AMDGPU::PREDICATE_BIT)
445 .addOperand(MI->getOperand(1))
446 .addImm(OPCODE_IS_NOT_ZERO_INT)
447 .addImm(0); // Flags
448 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000449 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 .addOperand(MI->getOperand(0))
451 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
452 break;
453 }
454
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 case AMDGPU::EG_ExportSwz:
456 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000457 // Instruction is left unmodified if its not the last one of its type
458 bool isLastInstructionOfItsType = true;
459 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000460 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000461 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000462 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000463 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
464 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
465 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
466 .getImm();
467 if (CurrentInstExportType == InstExportType) {
468 isLastInstructionOfItsType = false;
469 break;
470 }
471 }
472 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000473 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000474 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 return BB;
476 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
477 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
478 .addOperand(MI->getOperand(0))
479 .addOperand(MI->getOperand(1))
480 .addOperand(MI->getOperand(2))
481 .addOperand(MI->getOperand(3))
482 .addOperand(MI->getOperand(4))
483 .addOperand(MI->getOperand(5))
484 .addOperand(MI->getOperand(6))
485 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000486 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000489 case AMDGPU::RETURN: {
490 // RETURN instructions must have the live-out registers as implicit uses,
491 // otherwise they appear dead.
492 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
493 MachineInstrBuilder MIB(*MF, MI);
494 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
495 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
496 return BB;
497 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
499
500 MI->eraseFromParent();
501 return BB;
502}
503
504//===----------------------------------------------------------------------===//
505// Custom DAG Lowering Operations
506//===----------------------------------------------------------------------===//
507
Tom Stellard75aadc22012-12-11 21:25:42 +0000508SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000509 MachineFunction &MF = DAG.getMachineFunction();
510 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 switch (Op.getOpcode()) {
512 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000513 case ISD::FCOS:
514 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000517 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000518 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 case ISD::INTRINSIC_VOID: {
520 SDValue Chain = Op.getOperand(0);
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523 switch (IntrinsicID) {
524 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000527 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000528 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000530 case AMDGPUIntrinsic::R600_store_swizzle: {
531 const SDValue Args[8] = {
532 Chain,
533 Op.getOperand(2), // Export Value
534 Op.getOperand(3), // ArrayBase
535 Op.getOperand(4), // Type
536 DAG.getConstant(0, MVT::i32), // SWZ_X
537 DAG.getConstant(1, MVT::i32), // SWZ_Y
538 DAG.getConstant(2, MVT::i32), // SWZ_Z
539 DAG.getConstant(3, MVT::i32) // SWZ_W
540 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000541 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000542 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000544
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 // default for switch(IntrinsicID)
546 default: break;
547 }
548 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
549 break;
550 }
551 case ISD::INTRINSIC_WO_CHAIN: {
552 unsigned IntrinsicID =
553 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
554 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 switch(IntrinsicID) {
557 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000558 case AMDGPUIntrinsic::R600_load_input: {
559 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
560 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
561 MachineFunction &MF = DAG.getMachineFunction();
562 MachineRegisterInfo &MRI = MF.getRegInfo();
563 MRI.addLiveIn(Reg);
564 return DAG.getCopyFromReg(DAG.getEntryNode(),
565 SDLoc(DAG.getEntryNode()), Reg, VT);
566 }
567
568 case AMDGPUIntrinsic::R600_interp_input: {
569 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
570 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
571 MachineSDNode *interp;
572 if (ijb < 0) {
573 const MachineFunction &MF = DAG.getMachineFunction();
574 const R600InstrInfo *TII =
575 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
576 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
577 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
578 return DAG.getTargetExtractSubreg(
579 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
580 DL, MVT::f32, SDValue(interp, 0));
581 }
582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
596 RegisterJNode, RegisterINode);
597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
600 RegisterJNode, RegisterINode);
601 return SDValue(interp, slot % 2);
602 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000603 case AMDGPUIntrinsic::R600_interp_xy:
604 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000606 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000607 SDValue RegisterINode = Op.getOperand(2);
608 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000609
Vincent Lejeunef143af32013-11-11 22:10:24 +0000610 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000611 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000612 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000613 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000614 else
615 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000616 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000617 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000618 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
619 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000621 case AMDGPUIntrinsic::R600_tex:
622 case AMDGPUIntrinsic::R600_texc:
623 case AMDGPUIntrinsic::R600_txl:
624 case AMDGPUIntrinsic::R600_txlc:
625 case AMDGPUIntrinsic::R600_txb:
626 case AMDGPUIntrinsic::R600_txbc:
627 case AMDGPUIntrinsic::R600_txf:
628 case AMDGPUIntrinsic::R600_txq:
629 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000630 case AMDGPUIntrinsic::R600_ddy:
631 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000632 unsigned TextureOp;
633 switch (IntrinsicID) {
634 case AMDGPUIntrinsic::R600_tex:
635 TextureOp = 0;
636 break;
637 case AMDGPUIntrinsic::R600_texc:
638 TextureOp = 1;
639 break;
640 case AMDGPUIntrinsic::R600_txl:
641 TextureOp = 2;
642 break;
643 case AMDGPUIntrinsic::R600_txlc:
644 TextureOp = 3;
645 break;
646 case AMDGPUIntrinsic::R600_txb:
647 TextureOp = 4;
648 break;
649 case AMDGPUIntrinsic::R600_txbc:
650 TextureOp = 5;
651 break;
652 case AMDGPUIntrinsic::R600_txf:
653 TextureOp = 6;
654 break;
655 case AMDGPUIntrinsic::R600_txq:
656 TextureOp = 7;
657 break;
658 case AMDGPUIntrinsic::R600_ddx:
659 TextureOp = 8;
660 break;
661 case AMDGPUIntrinsic::R600_ddy:
662 TextureOp = 9;
663 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000664 case AMDGPUIntrinsic::R600_ldptr:
665 TextureOp = 10;
666 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000667 default:
668 llvm_unreachable("Unknow Texture Operation");
669 }
670
671 SDValue TexArgs[19] = {
672 DAG.getConstant(TextureOp, MVT::i32),
673 Op.getOperand(1),
674 DAG.getConstant(0, MVT::i32),
675 DAG.getConstant(1, MVT::i32),
676 DAG.getConstant(2, MVT::i32),
677 DAG.getConstant(3, MVT::i32),
678 Op.getOperand(2),
679 Op.getOperand(3),
680 Op.getOperand(4),
681 DAG.getConstant(0, MVT::i32),
682 DAG.getConstant(1, MVT::i32),
683 DAG.getConstant(2, MVT::i32),
684 DAG.getConstant(3, MVT::i32),
685 Op.getOperand(5),
686 Op.getOperand(6),
687 Op.getOperand(7),
688 Op.getOperand(8),
689 Op.getOperand(9),
690 Op.getOperand(10)
691 };
692 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
693 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000694 case AMDGPUIntrinsic::AMDGPU_dp4: {
695 SDValue Args[8] = {
696 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
697 DAG.getConstant(0, MVT::i32)),
698 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
699 DAG.getConstant(0, MVT::i32)),
700 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
701 DAG.getConstant(1, MVT::i32)),
702 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
703 DAG.getConstant(1, MVT::i32)),
704 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
705 DAG.getConstant(2, MVT::i32)),
706 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
707 DAG.getConstant(2, MVT::i32)),
708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
709 DAG.getConstant(3, MVT::i32)),
710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
711 DAG.getConstant(3, MVT::i32))
712 };
713 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
714 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000715
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000716 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000717 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000722 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000724 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000725 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000728 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000730 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000731 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return LowerImplicitParameter(DAG, VT, DL, 8);
734
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
737 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000738 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000739 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
740 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
743 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
746 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
749 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
752 AMDGPU::T0_Z, VT);
753 }
754 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
755 break;
756 }
757 } // end switch(Op.getOpcode())
758 return SDValue();
759}
760
761void R600TargetLowering::ReplaceNodeResults(SDNode *N,
762 SmallVectorImpl<SDValue> &Results,
763 SelectionDAG &DAG) const {
764 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000765 default:
766 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
767 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000769 return;
770 case ISD::LOAD: {
771 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
772 Results.push_back(SDValue(Node, 0));
773 Results.push_back(SDValue(Node, 1));
774 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
775 // function
776 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
777 return;
778 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000779 case ISD::STORE:
780 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
781 Results.push_back(SDValue(Node, 0));
782 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 }
784}
785
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000786SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
787 // On hw >= R700, COS/SIN input must be between -1. and 1.
788 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
789 EVT VT = Op.getValueType();
790 SDValue Arg = Op.getOperand(0);
791 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
792 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
793 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
794 DAG.getConstantFP(0.15915494309, MVT::f32)),
795 DAG.getConstantFP(0.5, MVT::f32)));
796 unsigned TrigNode;
797 switch (Op.getOpcode()) {
798 case ISD::FCOS:
799 TrigNode = AMDGPUISD::COS_HW;
800 break;
801 case ISD::FSIN:
802 TrigNode = AMDGPUISD::SIN_HW;
803 break;
804 default:
805 llvm_unreachable("Wrong trig opcode");
806 }
807 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
808 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
809 DAG.getConstantFP(-0.5, MVT::f32)));
810 if (Gen >= AMDGPUSubtarget::R700)
811 return TrigVal;
812 // On R600 hw, COS/SIN input must be between -Pi and Pi.
813 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
814 DAG.getConstantFP(3.14159265359, MVT::f32));
815}
816
Tom Stellard75aadc22012-12-11 21:25:42 +0000817SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
818 return DAG.getNode(
819 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000820 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000821 MVT::i1,
822 Op, DAG.getConstantFP(0.0f, MVT::f32),
823 DAG.getCondCode(ISD::SETNE)
824 );
825}
826
Tom Stellard75aadc22012-12-11 21:25:42 +0000827SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000828 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000829 unsigned DwordOffset) const {
830 unsigned ByteOffset = DwordOffset * 4;
831 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000832 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000833
834 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
835 assert(isInt<16>(ByteOffset));
836
837 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
838 DAG.getConstant(ByteOffset, MVT::i32), // PTR
839 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
840 false, false, false, 0);
841}
842
Tom Stellard75aadc22012-12-11 21:25:42 +0000843bool R600TargetLowering::isZero(SDValue Op) const {
844 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
845 return Cst->isNullValue();
846 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
847 return CstFP->isZero();
848 } else {
849 return false;
850 }
851}
852
853SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000854 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000855 EVT VT = Op.getValueType();
856
857 SDValue LHS = Op.getOperand(0);
858 SDValue RHS = Op.getOperand(1);
859 SDValue True = Op.getOperand(2);
860 SDValue False = Op.getOperand(3);
861 SDValue CC = Op.getOperand(4);
862 SDValue Temp;
863
864 // LHS and RHS are guaranteed to be the same value type
865 EVT CompareVT = LHS.getValueType();
866
867 // Check if we can lower this to a native operation.
868
Tom Stellard2add82d2013-03-08 15:37:09 +0000869 // Try to lower to a SET* instruction:
870 //
871 // SET* can match the following patterns:
872 //
Tom Stellardcd428182013-09-28 02:50:38 +0000873 // select_cc f32, f32, -1, 0, cc_supported
874 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
875 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000876 //
877
878 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000879 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
880 ISD::CondCode InverseCC =
881 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000882 if (isHWTrueValue(False) && isHWFalseValue(True)) {
883 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
884 std::swap(False, True);
885 CC = DAG.getCondCode(InverseCC);
886 } else {
887 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
888 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
889 std::swap(False, True);
890 std::swap(LHS, RHS);
891 CC = DAG.getCondCode(SwapInvCC);
892 }
893 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000894 }
895
896 if (isHWTrueValue(True) && isHWFalseValue(False) &&
897 (CompareVT == VT || VT == MVT::i32)) {
898 // This can be matched by a SET* instruction.
899 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
900 }
901
Tom Stellard75aadc22012-12-11 21:25:42 +0000902 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000903 //
904 // CND* can match the following patterns:
905 //
Tom Stellardcd428182013-09-28 02:50:38 +0000906 // select_cc f32, 0.0, f32, f32, cc_supported
907 // select_cc f32, 0.0, i32, i32, cc_supported
908 // select_cc i32, 0, f32, f32, cc_supported
909 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000910 //
Tom Stellardcd428182013-09-28 02:50:38 +0000911
912 // Try to move the zero value to the RHS
913 if (isZero(LHS)) {
914 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
915 // Try swapping the operands
916 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
917 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
918 std::swap(LHS, RHS);
919 CC = DAG.getCondCode(CCSwapped);
920 } else {
921 // Try inverting the conditon and then swapping the operands
922 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
923 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
924 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
925 std::swap(True, False);
926 std::swap(LHS, RHS);
927 CC = DAG.getCondCode(CCSwapped);
928 }
929 }
930 }
931 if (isZero(RHS)) {
932 SDValue Cond = LHS;
933 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000934 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
935 if (CompareVT != VT) {
936 // Bitcast True / False to the correct types. This will end up being
937 // a nop, but it allows us to define only a single pattern in the
938 // .TD files for each CND* instruction rather than having to have
939 // one pattern for integer True/False and one for fp True/False
940 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
941 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
942 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000943
944 switch (CCOpcode) {
945 case ISD::SETONE:
946 case ISD::SETUNE:
947 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000948 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
949 Temp = True;
950 True = False;
951 False = Temp;
952 break;
953 default:
954 break;
955 }
956 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
957 Cond, Zero,
958 True, False,
959 DAG.getCondCode(CCOpcode));
960 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
961 }
962
Tom Stellard75aadc22012-12-11 21:25:42 +0000963
964 // Possible Min/Max pattern
965 SDValue MinMax = LowerMinMax(Op, DAG);
966 if (MinMax.getNode()) {
967 return MinMax;
968 }
969
970 // If we make it this for it means we have no native instructions to handle
971 // this SELECT_CC, so we must lower it.
972 SDValue HWTrue, HWFalse;
973
974 if (CompareVT == MVT::f32) {
975 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
976 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
977 } else if (CompareVT == MVT::i32) {
978 HWTrue = DAG.getConstant(-1, CompareVT);
979 HWFalse = DAG.getConstant(0, CompareVT);
980 }
981 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000982 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +0000983 }
984
985 // Lower this unsupported SELECT_CC into a combination of two supported
986 // SELECT_CC operations.
987 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
988
989 return DAG.getNode(ISD::SELECT_CC, DL, VT,
990 Cond, HWFalse,
991 True, False,
992 DAG.getCondCode(ISD::SETNE));
993}
994
Alp Tokercb402912014-01-24 17:20:08 +0000995/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000996/// convert these pointers to a register index. Each register holds
997/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
998/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
999/// for indirect addressing.
1000SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1001 unsigned StackWidth,
1002 SelectionDAG &DAG) const {
1003 unsigned SRLPad;
1004 switch(StackWidth) {
1005 case 1:
1006 SRLPad = 2;
1007 break;
1008 case 2:
1009 SRLPad = 3;
1010 break;
1011 case 4:
1012 SRLPad = 4;
1013 break;
1014 default: llvm_unreachable("Invalid stack width");
1015 }
1016
Andrew Trickef9de2a2013-05-25 02:42:55 +00001017 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001018 DAG.getConstant(SRLPad, MVT::i32));
1019}
1020
1021void R600TargetLowering::getStackAddress(unsigned StackWidth,
1022 unsigned ElemIdx,
1023 unsigned &Channel,
1024 unsigned &PtrIncr) const {
1025 switch (StackWidth) {
1026 default:
1027 case 1:
1028 Channel = 0;
1029 if (ElemIdx > 0) {
1030 PtrIncr = 1;
1031 } else {
1032 PtrIncr = 0;
1033 }
1034 break;
1035 case 2:
1036 Channel = ElemIdx % 2;
1037 if (ElemIdx == 2) {
1038 PtrIncr = 1;
1039 } else {
1040 PtrIncr = 0;
1041 }
1042 break;
1043 case 4:
1044 Channel = ElemIdx;
1045 PtrIncr = 0;
1046 break;
1047 }
1048}
1049
Tom Stellard75aadc22012-12-11 21:25:42 +00001050SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001051 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001052 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1053 SDValue Chain = Op.getOperand(0);
1054 SDValue Value = Op.getOperand(1);
1055 SDValue Ptr = Op.getOperand(2);
1056
Tom Stellard2ffc3302013-08-26 15:05:44 +00001057 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001058 if (Result.getNode()) {
1059 return Result;
1060 }
1061
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001062 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1063 if (StoreNode->isTruncatingStore()) {
1064 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001065 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001066 EVT MemVT = StoreNode->getMemoryVT();
1067 SDValue MaskConstant;
1068 if (MemVT == MVT::i8) {
1069 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1070 } else {
1071 assert(MemVT == MVT::i16);
1072 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1073 }
1074 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1075 DAG.getConstant(2, MVT::i32));
1076 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1077 DAG.getConstant(0x00000003, VT));
1078 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1079 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1080 DAG.getConstant(3, VT));
1081 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1082 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1083 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1084 // vector instead.
1085 SDValue Src[4] = {
1086 ShiftedValue,
1087 DAG.getConstant(0, MVT::i32),
1088 DAG.getConstant(0, MVT::i32),
1089 Mask
1090 };
1091 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1092 SDValue Args[3] = { Chain, Input, DWordAddr };
1093 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1094 Op->getVTList(), Args, 3, MemVT,
1095 StoreNode->getMemOperand());
1096 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1097 Value.getValueType().bitsGE(MVT::i32)) {
1098 // Convert pointer from byte address to dword address.
1099 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1100 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1101 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001102
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001103 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001104 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001105 } else {
1106 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1107 }
1108 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001109 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001110 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001111
1112 EVT ValueVT = Value.getValueType();
1113
1114 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1115 return SDValue();
1116 }
1117
Tom Stellarde9373602014-01-22 19:24:14 +00001118 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1119 if (Ret.getNode()) {
1120 return Ret;
1121 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001122 // Lowering for indirect addressing
1123
1124 const MachineFunction &MF = DAG.getMachineFunction();
1125 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1126 getTargetMachine().getFrameLowering());
1127 unsigned StackWidth = TFL->getStackWidth(MF);
1128
1129 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1130
1131 if (ValueVT.isVector()) {
1132 unsigned NumElemVT = ValueVT.getVectorNumElements();
1133 EVT ElemVT = ValueVT.getVectorElementType();
1134 SDValue Stores[4];
1135
1136 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1137 "vector width in load");
1138
1139 for (unsigned i = 0; i < NumElemVT; ++i) {
1140 unsigned Channel, PtrIncr;
1141 getStackAddress(StackWidth, i, Channel, PtrIncr);
1142 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1143 DAG.getConstant(PtrIncr, MVT::i32));
1144 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1145 Value, DAG.getConstant(i, MVT::i32));
1146
1147 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1148 Chain, Elem, Ptr,
1149 DAG.getTargetConstant(Channel, MVT::i32));
1150 }
1151 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1152 } else {
1153 if (ValueVT == MVT::i8) {
1154 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1155 }
1156 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001157 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001158 }
1159
1160 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001161}
1162
Tom Stellard365366f2013-01-23 02:09:06 +00001163// return (512 + (kc_bank << 12)
1164static int
1165ConstantAddressBlock(unsigned AddressSpace) {
1166 switch (AddressSpace) {
1167 case AMDGPUAS::CONSTANT_BUFFER_0:
1168 return 512;
1169 case AMDGPUAS::CONSTANT_BUFFER_1:
1170 return 512 + 4096;
1171 case AMDGPUAS::CONSTANT_BUFFER_2:
1172 return 512 + 4096 * 2;
1173 case AMDGPUAS::CONSTANT_BUFFER_3:
1174 return 512 + 4096 * 3;
1175 case AMDGPUAS::CONSTANT_BUFFER_4:
1176 return 512 + 4096 * 4;
1177 case AMDGPUAS::CONSTANT_BUFFER_5:
1178 return 512 + 4096 * 5;
1179 case AMDGPUAS::CONSTANT_BUFFER_6:
1180 return 512 + 4096 * 6;
1181 case AMDGPUAS::CONSTANT_BUFFER_7:
1182 return 512 + 4096 * 7;
1183 case AMDGPUAS::CONSTANT_BUFFER_8:
1184 return 512 + 4096 * 8;
1185 case AMDGPUAS::CONSTANT_BUFFER_9:
1186 return 512 + 4096 * 9;
1187 case AMDGPUAS::CONSTANT_BUFFER_10:
1188 return 512 + 4096 * 10;
1189 case AMDGPUAS::CONSTANT_BUFFER_11:
1190 return 512 + 4096 * 11;
1191 case AMDGPUAS::CONSTANT_BUFFER_12:
1192 return 512 + 4096 * 12;
1193 case AMDGPUAS::CONSTANT_BUFFER_13:
1194 return 512 + 4096 * 13;
1195 case AMDGPUAS::CONSTANT_BUFFER_14:
1196 return 512 + 4096 * 14;
1197 case AMDGPUAS::CONSTANT_BUFFER_15:
1198 return 512 + 4096 * 15;
1199 default:
1200 return -1;
1201 }
1202}
1203
1204SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1205{
1206 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001207 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001208 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1209 SDValue Chain = Op.getOperand(0);
1210 SDValue Ptr = Op.getOperand(1);
1211 SDValue LoweredLoad;
1212
Tom Stellarde9373602014-01-22 19:24:14 +00001213 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1214 if (Ret.getNode()) {
1215 SDValue Ops[2];
1216 Ops[0] = Ret;
1217 Ops[1] = Chain;
1218 return DAG.getMergeValues(Ops, 2, DL);
1219 }
1220
1221
Tom Stellard35bb18c2013-08-26 15:06:04 +00001222 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1223 SDValue MergedValues[2] = {
1224 SplitVectorLoad(Op, DAG),
1225 Chain
1226 };
1227 return DAG.getMergeValues(MergedValues, 2, DL);
1228 }
1229
Tom Stellard365366f2013-01-23 02:09:06 +00001230 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001231 if (ConstantBlock > -1 &&
1232 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1233 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001234 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001235 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1236 isa<Constant>(LoadNode->getSrcValue()) ||
1237 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001238 SDValue Slots[4];
1239 for (unsigned i = 0; i < 4; i++) {
1240 // We want Const position encoded with the following formula :
1241 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1242 // const_index is Ptr computed by llvm using an alignment of 16.
1243 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1244 // then div by 4 at the ISel step
1245 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1246 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1247 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1248 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001249 EVT NewVT = MVT::v4i32;
1250 unsigned NumElements = 4;
1251 if (VT.isVector()) {
1252 NewVT = VT;
1253 NumElements = VT.getVectorNumElements();
1254 }
1255 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001256 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001257 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001258 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001259 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001260 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001261 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001262 );
1263 }
1264
1265 if (!VT.isVector()) {
1266 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1267 DAG.getConstant(0, MVT::i32));
1268 }
1269
1270 SDValue MergedValues[2] = {
1271 Result,
1272 Chain
1273 };
1274 return DAG.getMergeValues(MergedValues, 2, DL);
1275 }
1276
Matt Arsenault909d0c02013-10-30 23:43:29 +00001277 // For most operations returning SDValue() will result in the node being
1278 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1279 // need to manually expand loads that may be legal in some address spaces and
1280 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1281 // compute shaders, since the data is sign extended when it is uploaded to the
1282 // buffer. However SEXT loads from other address spaces are not supported, so
1283 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001284 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1285 EVT MemVT = LoadNode->getMemoryVT();
1286 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1287 SDValue ShiftAmount =
1288 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1289 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1290 LoadNode->getPointerInfo(), MemVT,
1291 LoadNode->isVolatile(),
1292 LoadNode->isNonTemporal(),
1293 LoadNode->getAlignment());
1294 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1295 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1296
1297 SDValue MergedValues[2] = { Sra, Chain };
1298 return DAG.getMergeValues(MergedValues, 2, DL);
1299 }
1300
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001301 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1302 return SDValue();
1303 }
1304
1305 // Lowering for indirect addressing
1306 const MachineFunction &MF = DAG.getMachineFunction();
1307 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1308 getTargetMachine().getFrameLowering());
1309 unsigned StackWidth = TFL->getStackWidth(MF);
1310
1311 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1312
1313 if (VT.isVector()) {
1314 unsigned NumElemVT = VT.getVectorNumElements();
1315 EVT ElemVT = VT.getVectorElementType();
1316 SDValue Loads[4];
1317
1318 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1319 "vector width in load");
1320
1321 for (unsigned i = 0; i < NumElemVT; ++i) {
1322 unsigned Channel, PtrIncr;
1323 getStackAddress(StackWidth, i, Channel, PtrIncr);
1324 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1325 DAG.getConstant(PtrIncr, MVT::i32));
1326 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1327 Chain, Ptr,
1328 DAG.getTargetConstant(Channel, MVT::i32),
1329 Op.getOperand(2));
1330 }
1331 for (unsigned i = NumElemVT; i < 4; ++i) {
1332 Loads[i] = DAG.getUNDEF(ElemVT);
1333 }
1334 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1335 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1336 } else {
1337 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1338 Chain, Ptr,
1339 DAG.getTargetConstant(0, MVT::i32), // Channel
1340 Op.getOperand(2));
1341 }
1342
1343 SDValue Ops[2];
1344 Ops[0] = LoweredLoad;
1345 Ops[1] = Chain;
1346
1347 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001348}
Tom Stellard75aadc22012-12-11 21:25:42 +00001349
Tom Stellard75aadc22012-12-11 21:25:42 +00001350/// XXX Only kernel functions are supported, so we can assume for now that
1351/// every function is a kernel function, but in the future we should use
1352/// separate calling conventions for kernel and non-kernel functions.
1353SDValue R600TargetLowering::LowerFormalArguments(
1354 SDValue Chain,
1355 CallingConv::ID CallConv,
1356 bool isVarArg,
1357 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001358 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001359 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001360 SmallVector<CCValAssign, 16> ArgLocs;
1361 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1362 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001363 MachineFunction &MF = DAG.getMachineFunction();
1364 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001365
Tom Stellardaf775432013-10-23 00:44:32 +00001366 SmallVector<ISD::InputArg, 8> LocalIns;
1367
1368 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1369 LocalIns);
1370
1371 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001372
Tom Stellard1e803092013-07-23 01:48:18 +00001373 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001374 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001375 EVT VT = Ins[i].VT;
1376 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001377
Vincent Lejeunef143af32013-11-11 22:10:24 +00001378 if (ShaderType != ShaderType::COMPUTE) {
1379 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1380 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1381 InVals.push_back(Register);
1382 continue;
1383 }
1384
Tom Stellard75aadc22012-12-11 21:25:42 +00001385 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001386 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001387
Matt Arsenaultfae02982014-03-17 18:58:11 +00001388 // i64 isn't a legal type, so the register type used ends up as i32, which
1389 // isn't expected here. It attempts to create this sextload, but it ends up
1390 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1391 // for <1 x i64>.
1392
Tom Stellardacfeebf2013-07-23 01:48:05 +00001393 // The first 36 bytes of the input buffer contains information about
1394 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001395 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1396 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1397 MachinePointerInfo(UndefValue::get(PtrTy)),
1398 MemVT, false, false, 4);
Alp Tokercb402912014-01-24 17:20:08 +00001399 // 4 is the preferred alignment for
1400 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001401 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001402 }
1403 return Chain;
1404}
1405
Matt Arsenault758659232013-05-18 00:21:46 +00001406EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001407 if (!VT.isVector()) return MVT::i32;
1408 return VT.changeVectorElementTypeToInteger();
1409}
1410
Benjamin Kramer193960c2013-06-11 13:32:25 +00001411static SDValue
1412CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1413 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001414 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1415 assert(RemapSwizzle.empty());
1416 SDValue NewBldVec[4] = {
1417 VectorEntry.getOperand(0),
1418 VectorEntry.getOperand(1),
1419 VectorEntry.getOperand(2),
1420 VectorEntry.getOperand(3)
1421 };
1422
1423 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001424 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1425 // We mask write here to teach later passes that the ith element of this
1426 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1427 // break false dependencies and additionnaly make assembly easier to read.
1428 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001429 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1430 if (C->isZero()) {
1431 RemapSwizzle[i] = 4; // SEL_0
1432 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1433 } else if (C->isExactlyValue(1.0)) {
1434 RemapSwizzle[i] = 5; // SEL_1
1435 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1436 }
1437 }
1438
1439 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1440 continue;
1441 for (unsigned j = 0; j < i; j++) {
1442 if (NewBldVec[i] == NewBldVec[j]) {
1443 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1444 RemapSwizzle[i] = j;
1445 break;
1446 }
1447 }
1448 }
1449
1450 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1451 VectorEntry.getValueType(), NewBldVec, 4);
1452}
1453
Benjamin Kramer193960c2013-06-11 13:32:25 +00001454static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1455 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001456 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1457 assert(RemapSwizzle.empty());
1458 SDValue NewBldVec[4] = {
1459 VectorEntry.getOperand(0),
1460 VectorEntry.getOperand(1),
1461 VectorEntry.getOperand(2),
1462 VectorEntry.getOperand(3)
1463 };
1464 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001465 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001466 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001467 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1468 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1469 ->getZExtValue();
1470 if (i == Idx)
1471 isUnmovable[Idx] = true;
1472 }
1473 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001474
1475 for (unsigned i = 0; i < 4; i++) {
1476 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1477 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1478 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001479 if (isUnmovable[Idx])
1480 continue;
1481 // Swap i and Idx
1482 std::swap(NewBldVec[Idx], NewBldVec[i]);
1483 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1484 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001485 }
1486 }
1487
1488 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1489 VectorEntry.getValueType(), NewBldVec, 4);
1490}
1491
1492
1493SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1494SDValue Swz[4], SelectionDAG &DAG) const {
1495 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1496 // Old -> New swizzle values
1497 DenseMap<unsigned, unsigned> SwizzleRemap;
1498
1499 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1500 for (unsigned i = 0; i < 4; i++) {
1501 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1502 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1503 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1504 }
1505
1506 SwizzleRemap.clear();
1507 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1508 for (unsigned i = 0; i < 4; i++) {
1509 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1510 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1511 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1512 }
1513
1514 return BuildVector;
1515}
1516
1517
Tom Stellard75aadc22012-12-11 21:25:42 +00001518//===----------------------------------------------------------------------===//
1519// Custom DAG Optimizations
1520//===----------------------------------------------------------------------===//
1521
1522SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1523 DAGCombinerInfo &DCI) const {
1524 SelectionDAG &DAG = DCI.DAG;
1525
1526 switch (N->getOpcode()) {
1527 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1528 case ISD::FP_ROUND: {
1529 SDValue Arg = N->getOperand(0);
1530 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001531 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001532 Arg.getOperand(0));
1533 }
1534 break;
1535 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001536
1537 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1538 // (i32 select_cc f32, f32, -1, 0 cc)
1539 //
1540 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1541 // this to one of the SET*_DX10 instructions.
1542 case ISD::FP_TO_SINT: {
1543 SDValue FNeg = N->getOperand(0);
1544 if (FNeg.getOpcode() != ISD::FNEG) {
1545 return SDValue();
1546 }
1547 SDValue SelectCC = FNeg.getOperand(0);
1548 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1549 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1550 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1551 !isHWTrueValue(SelectCC.getOperand(2)) ||
1552 !isHWFalseValue(SelectCC.getOperand(3))) {
1553 return SDValue();
1554 }
1555
Andrew Trickef9de2a2013-05-25 02:42:55 +00001556 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001557 SelectCC.getOperand(0), // LHS
1558 SelectCC.getOperand(1), // RHS
1559 DAG.getConstant(-1, MVT::i32), // True
1560 DAG.getConstant(0, MVT::i32), // Flase
1561 SelectCC.getOperand(4)); // CC
1562
1563 break;
1564 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001565
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001566 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1567 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001568 case ISD::INSERT_VECTOR_ELT: {
1569 SDValue InVec = N->getOperand(0);
1570 SDValue InVal = N->getOperand(1);
1571 SDValue EltNo = N->getOperand(2);
1572 SDLoc dl(N);
1573
1574 // If the inserted element is an UNDEF, just use the input vector.
1575 if (InVal.getOpcode() == ISD::UNDEF)
1576 return InVec;
1577
1578 EVT VT = InVec.getValueType();
1579
1580 // If we can't generate a legal BUILD_VECTOR, exit
1581 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1582 return SDValue();
1583
1584 // Check that we know which element is being inserted
1585 if (!isa<ConstantSDNode>(EltNo))
1586 return SDValue();
1587 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1588
1589 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1590 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1591 // vector elements.
1592 SmallVector<SDValue, 8> Ops;
1593 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1594 Ops.append(InVec.getNode()->op_begin(),
1595 InVec.getNode()->op_end());
1596 } else if (InVec.getOpcode() == ISD::UNDEF) {
1597 unsigned NElts = VT.getVectorNumElements();
1598 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1599 } else {
1600 return SDValue();
1601 }
1602
1603 // Insert the element
1604 if (Elt < Ops.size()) {
1605 // All the operands of BUILD_VECTOR must have the same type;
1606 // we enforce that here.
1607 EVT OpVT = Ops[0].getValueType();
1608 if (InVal.getValueType() != OpVT)
1609 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1610 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1611 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1612 Ops[Elt] = InVal;
1613 }
1614
1615 // Return the new vector
1616 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1617 VT, &Ops[0], Ops.size());
1618 }
1619
Tom Stellard365366f2013-01-23 02:09:06 +00001620 // Extract_vec (Build_vector) generated by custom lowering
1621 // also needs to be customly combined
1622 case ISD::EXTRACT_VECTOR_ELT: {
1623 SDValue Arg = N->getOperand(0);
1624 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1625 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1626 unsigned Element = Const->getZExtValue();
1627 return Arg->getOperand(Element);
1628 }
1629 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001630 if (Arg.getOpcode() == ISD::BITCAST &&
1631 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1632 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1633 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001634 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001635 Arg->getOperand(0).getOperand(Element));
1636 }
1637 }
Tom Stellard365366f2013-01-23 02:09:06 +00001638 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001639
1640 case ISD::SELECT_CC: {
1641 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1642 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001643 //
1644 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1645 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001646 SDValue LHS = N->getOperand(0);
1647 if (LHS.getOpcode() != ISD::SELECT_CC) {
1648 return SDValue();
1649 }
1650
1651 SDValue RHS = N->getOperand(1);
1652 SDValue True = N->getOperand(2);
1653 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001654 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001655
1656 if (LHS.getOperand(2).getNode() != True.getNode() ||
1657 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001658 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001659 return SDValue();
1660 }
1661
Tom Stellard5e524892013-03-08 15:37:11 +00001662 switch (NCC) {
1663 default: return SDValue();
1664 case ISD::SETNE: return LHS;
1665 case ISD::SETEQ: {
1666 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1667 LHSCC = ISD::getSetCCInverse(LHSCC,
1668 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001669 if (DCI.isBeforeLegalizeOps() ||
1670 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1671 return DAG.getSelectCC(SDLoc(N),
1672 LHS.getOperand(0),
1673 LHS.getOperand(1),
1674 LHS.getOperand(2),
1675 LHS.getOperand(3),
1676 LHSCC);
1677 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001678 }
Tom Stellard5e524892013-03-08 15:37:11 +00001679 }
Tom Stellardcd428182013-09-28 02:50:38 +00001680 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001681 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001682
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001683 case AMDGPUISD::EXPORT: {
1684 SDValue Arg = N->getOperand(1);
1685 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1686 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001687
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001688 SDValue NewArgs[8] = {
1689 N->getOperand(0), // Chain
1690 SDValue(),
1691 N->getOperand(2), // ArrayBase
1692 N->getOperand(3), // Type
1693 N->getOperand(4), // SWZ_X
1694 N->getOperand(5), // SWZ_Y
1695 N->getOperand(6), // SWZ_Z
1696 N->getOperand(7) // SWZ_W
1697 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001698 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001699 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001700 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001701 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001702 case AMDGPUISD::TEXTURE_FETCH: {
1703 SDValue Arg = N->getOperand(1);
1704 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1705 break;
1706
1707 SDValue NewArgs[19] = {
1708 N->getOperand(0),
1709 N->getOperand(1),
1710 N->getOperand(2),
1711 N->getOperand(3),
1712 N->getOperand(4),
1713 N->getOperand(5),
1714 N->getOperand(6),
1715 N->getOperand(7),
1716 N->getOperand(8),
1717 N->getOperand(9),
1718 N->getOperand(10),
1719 N->getOperand(11),
1720 N->getOperand(12),
1721 N->getOperand(13),
1722 N->getOperand(14),
1723 N->getOperand(15),
1724 N->getOperand(16),
1725 N->getOperand(17),
1726 N->getOperand(18),
1727 };
1728 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1729 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1730 NewArgs, 19);
1731 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001732 }
1733 return SDValue();
1734}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001735
1736static bool
1737FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001738 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001739 const R600InstrInfo *TII =
1740 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1741 if (!Src.isMachineOpcode())
1742 return false;
1743 switch (Src.getMachineOpcode()) {
1744 case AMDGPU::FNEG_R600:
1745 if (!Neg.getNode())
1746 return false;
1747 Src = Src.getOperand(0);
1748 Neg = DAG.getTargetConstant(1, MVT::i32);
1749 return true;
1750 case AMDGPU::FABS_R600:
1751 if (!Abs.getNode())
1752 return false;
1753 Src = Src.getOperand(0);
1754 Abs = DAG.getTargetConstant(1, MVT::i32);
1755 return true;
1756 case AMDGPU::CONST_COPY: {
1757 unsigned Opcode = ParentNode->getMachineOpcode();
1758 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1759
1760 if (!Sel.getNode())
1761 return false;
1762
1763 SDValue CstOffset = Src.getOperand(0);
1764 if (ParentNode->getValueType(0).isVector())
1765 return false;
1766
1767 // Gather constants values
1768 int SrcIndices[] = {
1769 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1770 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1771 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1772 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1773 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1774 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1775 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1776 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1777 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1778 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1779 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1780 };
1781 std::vector<unsigned> Consts;
1782 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1783 int OtherSrcIdx = SrcIndices[i];
1784 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1785 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1786 continue;
1787 if (HasDst) {
1788 OtherSrcIdx--;
1789 OtherSelIdx--;
1790 }
1791 if (RegisterSDNode *Reg =
1792 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1793 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1794 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1795 ParentNode->getOperand(OtherSelIdx));
1796 Consts.push_back(Cst->getZExtValue());
1797 }
1798 }
1799 }
1800
1801 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1802 Consts.push_back(Cst->getZExtValue());
1803 if (!TII->fitsConstReadLimitations(Consts)) {
1804 return false;
1805 }
1806
1807 Sel = CstOffset;
1808 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1809 return true;
1810 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001811 case AMDGPU::MOV_IMM_I32:
1812 case AMDGPU::MOV_IMM_F32: {
1813 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1814 uint64_t ImmValue = 0;
1815
1816
1817 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1818 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1819 float FloatValue = FPC->getValueAPF().convertToFloat();
1820 if (FloatValue == 0.0) {
1821 ImmReg = AMDGPU::ZERO;
1822 } else if (FloatValue == 0.5) {
1823 ImmReg = AMDGPU::HALF;
1824 } else if (FloatValue == 1.0) {
1825 ImmReg = AMDGPU::ONE;
1826 } else {
1827 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1828 }
1829 } else {
1830 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1831 uint64_t Value = C->getZExtValue();
1832 if (Value == 0) {
1833 ImmReg = AMDGPU::ZERO;
1834 } else if (Value == 1) {
1835 ImmReg = AMDGPU::ONE_INT;
1836 } else {
1837 ImmValue = Value;
1838 }
1839 }
1840
1841 // Check that we aren't already using an immediate.
1842 // XXX: It's possible for an instruction to have more than one
1843 // immediate operand, but this is not supported yet.
1844 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1845 if (!Imm.getNode())
1846 return false;
1847 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1848 assert(C);
1849 if (C->getZExtValue())
1850 return false;
1851 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1852 }
1853 Src = DAG.getRegister(ImmReg, MVT::i32);
1854 return true;
1855 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001856 default:
1857 return false;
1858 }
1859}
1860
1861
1862/// \brief Fold the instructions after selecting them
1863SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1864 SelectionDAG &DAG) const {
1865 const R600InstrInfo *TII =
1866 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1867 if (!Node->isMachineOpcode())
1868 return Node;
1869 unsigned Opcode = Node->getMachineOpcode();
1870 SDValue FakeOp;
1871
1872 std::vector<SDValue> Ops;
1873 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1874 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001875 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001876
1877 if (Opcode == AMDGPU::DOT_4) {
1878 int OperandIdx[] = {
1879 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1880 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1881 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1883 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1884 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1885 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1886 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001887 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001888 int NegIdx[] = {
1889 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1890 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1891 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1893 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1894 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1895 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1896 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1897 };
1898 int AbsIdx[] = {
1899 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1900 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1901 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1902 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1903 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1904 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1905 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1906 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1907 };
1908 for (unsigned i = 0; i < 8; i++) {
1909 if (OperandIdx[i] < 0)
1910 return Node;
1911 SDValue &Src = Ops[OperandIdx[i] - 1];
1912 SDValue &Neg = Ops[NegIdx[i] - 1];
1913 SDValue &Abs = Ops[AbsIdx[i] - 1];
1914 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1915 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1916 if (HasDst)
1917 SelIdx--;
1918 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001919 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1920 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1921 }
1922 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1923 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1924 SDValue &Src = Ops[i];
1925 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001926 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1927 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001928 } else if (Opcode == AMDGPU::CLAMP_R600) {
1929 SDValue Src = Node->getOperand(0);
1930 if (!Src.isMachineOpcode() ||
1931 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1932 return Node;
1933 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1934 AMDGPU::OpName::clamp);
1935 if (ClampIdx < 0)
1936 return Node;
1937 std::vector<SDValue> Ops;
1938 unsigned NumOp = Src.getNumOperands();
1939 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001940 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001941 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1942 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1943 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001944 } else {
1945 if (!TII->hasInstrModifiers(Opcode))
1946 return Node;
1947 int OperandIdx[] = {
1948 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1949 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1950 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1951 };
1952 int NegIdx[] = {
1953 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1954 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1955 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1956 };
1957 int AbsIdx[] = {
1958 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1959 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1960 -1
1961 };
1962 for (unsigned i = 0; i < 3; i++) {
1963 if (OperandIdx[i] < 0)
1964 return Node;
1965 SDValue &Src = Ops[OperandIdx[i] - 1];
1966 SDValue &Neg = Ops[NegIdx[i] - 1];
1967 SDValue FakeAbs;
1968 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1969 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1970 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001971 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1972 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001973 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001974 ImmIdx--;
1975 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001976 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001977 SDValue &Imm = Ops[ImmIdx];
1978 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001979 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1980 }
1981 }
1982
1983 return Node;
1984}