blob: 3d424c65035afa8b9756513d5f3bde296466b358 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000093 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
95 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000097 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000098 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000099 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000101 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
102 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
Tom Stellard365366f2013-01-23 02:09:06 +0000104 setOperationAction(ISD::LOAD, MVT::i32, Custom);
105 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000106 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
107
Tom Stellard75aadc22012-12-11 21:25:42 +0000108 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000109 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000111 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000112 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000113
Michel Danzer49812b52013-07-10 16:37:07 +0000114 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
115
Tom Stellardb852af52013-03-08 15:37:03 +0000116 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000117 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000118 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119}
120
121MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
122 MachineInstr * MI, MachineBasicBlock * BB) const {
123 MachineFunction * MF = BB->getParent();
124 MachineRegisterInfo &MRI = MF->getRegInfo();
125 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000126 const R600InstrInfo *TII =
127 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
129 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000130 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000131 if (TII->isLDSInstr(MI->getOpcode()) &&
132 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
133 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
134 assert(DstIdx != -1);
135 MachineInstrBuilder NewMI;
136 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
137 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
138 AMDGPU::OQAP);
139 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
140 MI->getOperand(0).getReg(),
141 AMDGPU::OQAP);
142 } else {
143 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
144 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
145 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000146 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
147 NewMI.addOperand(MI->getOperand(i));
148 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000149 } else {
150 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
151 }
152 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 case AMDGPU::CLAMP_R600: {
154 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
155 AMDGPU::MOV,
156 MI->getOperand(0).getReg(),
157 MI->getOperand(1).getReg());
158 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
159 break;
160 }
161
162 case AMDGPU::FABS_R600: {
163 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
164 AMDGPU::MOV,
165 MI->getOperand(0).getReg(),
166 MI->getOperand(1).getReg());
167 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
168 break;
169 }
170
171 case AMDGPU::FNEG_R600: {
172 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
173 AMDGPU::MOV,
174 MI->getOperand(0).getReg(),
175 MI->getOperand(1).getReg());
176 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
177 break;
178 }
179
Tom Stellard75aadc22012-12-11 21:25:42 +0000180 case AMDGPU::MASK_WRITE: {
181 unsigned maskedRegister = MI->getOperand(0).getReg();
182 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
183 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
184 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
185 break;
186 }
187
188 case AMDGPU::MOV_IMM_F32:
189 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
190 MI->getOperand(1).getFPImm()->getValueAPF()
191 .bitcastToAPInt().getZExtValue());
192 break;
193 case AMDGPU::MOV_IMM_I32:
194 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
195 MI->getOperand(1).getImm());
196 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000197 case AMDGPU::CONST_COPY: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
199 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000200 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000201 MI->getOperand(1).getImm());
202 break;
203 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000206 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000207 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
208 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
209
210 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
211 .addOperand(MI->getOperand(0))
212 .addOperand(MI->getOperand(1))
213 .addImm(EOP); // Set End of program bit
214 break;
215 }
216
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::TXD: {
218 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
219 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000220 MachineOperand &RID = MI->getOperand(4);
221 MachineOperand &SID = MI->getOperand(5);
222 unsigned TextureId = MI->getOperand(6).getImm();
223 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
224 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 switch (TextureId) {
227 case 5: // Rect
228 CTX = CTY = 0;
229 break;
230 case 6: // Shadow1D
231 SrcW = SrcZ;
232 break;
233 case 7: // Shadow2D
234 SrcW = SrcZ;
235 break;
236 case 8: // ShadowRect
237 CTX = CTY = 0;
238 SrcW = SrcZ;
239 break;
240 case 9: // 1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 10: // 2DArray
245 CTZ = 0;
246 break;
247 case 11: // Shadow1DArray
248 SrcZ = SrcY;
249 CTZ = 0;
250 break;
251 case 12: // Shadow2DArray
252 CTZ = 0;
253 break;
254 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
256 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 .addImm(SrcX)
258 .addImm(SrcY)
259 .addImm(SrcZ)
260 .addImm(SrcW)
261 .addImm(0)
262 .addImm(0)
263 .addImm(0)
264 .addImm(0)
265 .addImm(1)
266 .addImm(2)
267 .addImm(3)
268 .addOperand(RID)
269 .addOperand(SID)
270 .addImm(CTX)
271 .addImm(CTY)
272 .addImm(CTZ)
273 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
275 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000276 .addImm(SrcX)
277 .addImm(SrcY)
278 .addImm(SrcZ)
279 .addImm(SrcW)
280 .addImm(0)
281 .addImm(0)
282 .addImm(0)
283 .addImm(0)
284 .addImm(1)
285 .addImm(2)
286 .addImm(3)
287 .addOperand(RID)
288 .addOperand(SID)
289 .addImm(CTX)
290 .addImm(CTY)
291 .addImm(CTZ)
292 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 .addImm(SrcX)
297 .addImm(SrcY)
298 .addImm(SrcZ)
299 .addImm(SrcW)
300 .addImm(0)
301 .addImm(0)
302 .addImm(0)
303 .addImm(0)
304 .addImm(1)
305 .addImm(2)
306 .addImm(3)
307 .addOperand(RID)
308 .addOperand(SID)
309 .addImm(CTX)
310 .addImm(CTY)
311 .addImm(CTZ)
312 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000313 .addReg(T0, RegState::Implicit)
314 .addReg(T1, RegState::Implicit);
315 break;
316 }
317
318 case AMDGPU::TXD_SHADOW: {
319 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
320 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 MachineOperand &RID = MI->getOperand(4);
322 MachineOperand &SID = MI->getOperand(5);
323 unsigned TextureId = MI->getOperand(6).getImm();
324 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
325 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
326
327 switch (TextureId) {
328 case 5: // Rect
329 CTX = CTY = 0;
330 break;
331 case 6: // Shadow1D
332 SrcW = SrcZ;
333 break;
334 case 7: // Shadow2D
335 SrcW = SrcZ;
336 break;
337 case 8: // ShadowRect
338 CTX = CTY = 0;
339 SrcW = SrcZ;
340 break;
341 case 9: // 1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 10: // 2DArray
346 CTZ = 0;
347 break;
348 case 11: // Shadow1DArray
349 SrcZ = SrcY;
350 CTZ = 0;
351 break;
352 case 12: // Shadow2DArray
353 CTZ = 0;
354 break;
355 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000356
357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
358 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
377 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000378 .addImm(SrcX)
379 .addImm(SrcY)
380 .addImm(SrcZ)
381 .addImm(SrcW)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(0)
386 .addImm(1)
387 .addImm(2)
388 .addImm(3)
389 .addOperand(RID)
390 .addOperand(SID)
391 .addImm(CTX)
392 .addImm(CTY)
393 .addImm(CTZ)
394 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
396 .addOperand(MI->getOperand(0))
397 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000398 .addImm(SrcX)
399 .addImm(SrcY)
400 .addImm(SrcZ)
401 .addImm(SrcW)
402 .addImm(0)
403 .addImm(0)
404 .addImm(0)
405 .addImm(0)
406 .addImm(1)
407 .addImm(2)
408 .addImm(3)
409 .addOperand(RID)
410 .addOperand(SID)
411 .addImm(CTX)
412 .addImm(CTY)
413 .addImm(CTZ)
414 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000415 .addReg(T0, RegState::Implicit)
416 .addReg(T1, RegState::Implicit);
417 break;
418 }
419
420 case AMDGPU::BRANCH:
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000422 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 break;
424
425 case AMDGPU::BRANCH_COND_f32: {
426 MachineInstr *NewMI =
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
428 AMDGPU::PREDICATE_BIT)
429 .addOperand(MI->getOperand(1))
430 .addImm(OPCODE_IS_NOT_ZERO)
431 .addImm(0); // Flags
432 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000433 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000434 .addOperand(MI->getOperand(0))
435 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
436 break;
437 }
438
439 case AMDGPU::BRANCH_COND_i32: {
440 MachineInstr *NewMI =
441 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
442 AMDGPU::PREDICATE_BIT)
443 .addOperand(MI->getOperand(1))
444 .addImm(OPCODE_IS_NOT_ZERO_INT)
445 .addImm(0); // Flags
446 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 .addOperand(MI->getOperand(0))
449 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
450 break;
451 }
452
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 case AMDGPU::EG_ExportSwz:
454 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 // Instruction is left unmodified if its not the last one of its type
456 bool isLastInstructionOfItsType = true;
457 unsigned InstExportType = MI->getOperand(1).getImm();
458 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
459 EndBlock = BB->end(); NextExportInst != EndBlock;
460 NextExportInst = llvm::next(NextExportInst)) {
461 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
462 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
463 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
464 .getImm();
465 if (CurrentInstExportType == InstExportType) {
466 isLastInstructionOfItsType = false;
467 break;
468 }
469 }
470 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000472 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 return BB;
474 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
476 .addOperand(MI->getOperand(0))
477 .addOperand(MI->getOperand(1))
478 .addOperand(MI->getOperand(2))
479 .addOperand(MI->getOperand(3))
480 .addOperand(MI->getOperand(4))
481 .addOperand(MI->getOperand(5))
482 .addOperand(MI->getOperand(6))
483 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 break;
486 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000487 case AMDGPU::RETURN: {
488 // RETURN instructions must have the live-out registers as implicit uses,
489 // otherwise they appear dead.
490 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
491 MachineInstrBuilder MIB(*MF, MI);
492 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
493 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
494 return BB;
495 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 }
497
498 MI->eraseFromParent();
499 return BB;
500}
501
502//===----------------------------------------------------------------------===//
503// Custom DAG Lowering Operations
504//===----------------------------------------------------------------------===//
505
Tom Stellard75aadc22012-12-11 21:25:42 +0000506SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000507 MachineFunction &MF = DAG.getMachineFunction();
508 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 switch (Op.getOpcode()) {
510 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000511 case ISD::FCOS:
512 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000515 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000516 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000517 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 case ISD::INTRINSIC_VOID: {
519 SDValue Chain = Op.getOperand(0);
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
522 switch (IntrinsicID) {
523 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
525 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000526 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000527 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000529 case AMDGPUIntrinsic::R600_store_swizzle: {
530 const SDValue Args[8] = {
531 Chain,
532 Op.getOperand(2), // Export Value
533 Op.getOperand(3), // ArrayBase
534 Op.getOperand(4), // Type
535 DAG.getConstant(0, MVT::i32), // SWZ_X
536 DAG.getConstant(1, MVT::i32), // SWZ_Y
537 DAG.getConstant(2, MVT::i32), // SWZ_Z
538 DAG.getConstant(3, MVT::i32) // SWZ_W
539 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000540 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000541 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 // default for switch(IntrinsicID)
545 default: break;
546 }
547 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
548 break;
549 }
550 case ISD::INTRINSIC_WO_CHAIN: {
551 unsigned IntrinsicID =
552 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
553 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000554 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 switch(IntrinsicID) {
556 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
557 case AMDGPUIntrinsic::R600_load_input: {
558 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
559 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000560 MachineFunction &MF = DAG.getMachineFunction();
561 MachineRegisterInfo &MRI = MF.getRegInfo();
562 MRI.addLiveIn(Reg);
563 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000564 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000565 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000566
567 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000569 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
570 MachineSDNode *interp;
571 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000572 const MachineFunction &MF = DAG.getMachineFunction();
573 const R600InstrInfo *TII =
574 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000575 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
576 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
577 return DAG.getTargetExtractSubreg(
578 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
579 DL, MVT::f32, SDValue(interp, 0));
580 }
581
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
Tom Stellard41afe6a2013-02-05 17:09:14 +0000593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000596 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000600 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000601 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000603 case AMDGPUIntrinsic::R600_tex:
604 case AMDGPUIntrinsic::R600_texc:
605 case AMDGPUIntrinsic::R600_txl:
606 case AMDGPUIntrinsic::R600_txlc:
607 case AMDGPUIntrinsic::R600_txb:
608 case AMDGPUIntrinsic::R600_txbc:
609 case AMDGPUIntrinsic::R600_txf:
610 case AMDGPUIntrinsic::R600_txq:
611 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000612 case AMDGPUIntrinsic::R600_ddy:
613 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000614 unsigned TextureOp;
615 switch (IntrinsicID) {
616 case AMDGPUIntrinsic::R600_tex:
617 TextureOp = 0;
618 break;
619 case AMDGPUIntrinsic::R600_texc:
620 TextureOp = 1;
621 break;
622 case AMDGPUIntrinsic::R600_txl:
623 TextureOp = 2;
624 break;
625 case AMDGPUIntrinsic::R600_txlc:
626 TextureOp = 3;
627 break;
628 case AMDGPUIntrinsic::R600_txb:
629 TextureOp = 4;
630 break;
631 case AMDGPUIntrinsic::R600_txbc:
632 TextureOp = 5;
633 break;
634 case AMDGPUIntrinsic::R600_txf:
635 TextureOp = 6;
636 break;
637 case AMDGPUIntrinsic::R600_txq:
638 TextureOp = 7;
639 break;
640 case AMDGPUIntrinsic::R600_ddx:
641 TextureOp = 8;
642 break;
643 case AMDGPUIntrinsic::R600_ddy:
644 TextureOp = 9;
645 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000646 case AMDGPUIntrinsic::R600_ldptr:
647 TextureOp = 10;
648 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000649 default:
650 llvm_unreachable("Unknow Texture Operation");
651 }
652
653 SDValue TexArgs[19] = {
654 DAG.getConstant(TextureOp, MVT::i32),
655 Op.getOperand(1),
656 DAG.getConstant(0, MVT::i32),
657 DAG.getConstant(1, MVT::i32),
658 DAG.getConstant(2, MVT::i32),
659 DAG.getConstant(3, MVT::i32),
660 Op.getOperand(2),
661 Op.getOperand(3),
662 Op.getOperand(4),
663 DAG.getConstant(0, MVT::i32),
664 DAG.getConstant(1, MVT::i32),
665 DAG.getConstant(2, MVT::i32),
666 DAG.getConstant(3, MVT::i32),
667 Op.getOperand(5),
668 Op.getOperand(6),
669 Op.getOperand(7),
670 Op.getOperand(8),
671 Op.getOperand(9),
672 Op.getOperand(10)
673 };
674 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
675 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000676 case AMDGPUIntrinsic::AMDGPU_dp4: {
677 SDValue Args[8] = {
678 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
679 DAG.getConstant(0, MVT::i32)),
680 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
681 DAG.getConstant(0, MVT::i32)),
682 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
683 DAG.getConstant(1, MVT::i32)),
684 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
685 DAG.getConstant(1, MVT::i32)),
686 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
687 DAG.getConstant(2, MVT::i32)),
688 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
689 DAG.getConstant(2, MVT::i32)),
690 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
691 DAG.getConstant(3, MVT::i32)),
692 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
693 DAG.getConstant(3, MVT::i32))
694 };
695 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
696 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000697
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000700 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000702 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000704 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000705 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000708 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000709 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000710 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000711 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000712 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000713 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000714 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000715 return LowerImplicitParameter(DAG, VT, DL, 8);
716
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000717 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000718 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
719 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
722 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000723 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000724 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
725 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
728 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000729 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000730 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
731 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
734 AMDGPU::T0_Z, VT);
735 }
736 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
737 break;
738 }
739 } // end switch(Op.getOpcode())
740 return SDValue();
741}
742
743void R600TargetLowering::ReplaceNodeResults(SDNode *N,
744 SmallVectorImpl<SDValue> &Results,
745 SelectionDAG &DAG) const {
746 switch (N->getOpcode()) {
747 default: return;
748 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000749 return;
750 case ISD::LOAD: {
751 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
752 Results.push_back(SDValue(Node, 0));
753 Results.push_back(SDValue(Node, 1));
754 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
755 // function
756 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
757 return;
758 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000759 case ISD::STORE:
760 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
761 Results.push_back(SDValue(Node, 0));
762 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000763 }
764}
765
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000766SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
767 // On hw >= R700, COS/SIN input must be between -1. and 1.
768 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
769 EVT VT = Op.getValueType();
770 SDValue Arg = Op.getOperand(0);
771 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
772 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
773 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
774 DAG.getConstantFP(0.15915494309, MVT::f32)),
775 DAG.getConstantFP(0.5, MVT::f32)));
776 unsigned TrigNode;
777 switch (Op.getOpcode()) {
778 case ISD::FCOS:
779 TrigNode = AMDGPUISD::COS_HW;
780 break;
781 case ISD::FSIN:
782 TrigNode = AMDGPUISD::SIN_HW;
783 break;
784 default:
785 llvm_unreachable("Wrong trig opcode");
786 }
787 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
788 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
789 DAG.getConstantFP(-0.5, MVT::f32)));
790 if (Gen >= AMDGPUSubtarget::R700)
791 return TrigVal;
792 // On R600 hw, COS/SIN input must be between -Pi and Pi.
793 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
794 DAG.getConstantFP(3.14159265359, MVT::f32));
795}
796
Tom Stellard75aadc22012-12-11 21:25:42 +0000797SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
798 return DAG.getNode(
799 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000800 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 MVT::i1,
802 Op, DAG.getConstantFP(0.0f, MVT::f32),
803 DAG.getCondCode(ISD::SETNE)
804 );
805}
806
Tom Stellard75aadc22012-12-11 21:25:42 +0000807SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000808 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 unsigned DwordOffset) const {
810 unsigned ByteOffset = DwordOffset * 4;
811 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000812 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000813
814 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
815 assert(isInt<16>(ByteOffset));
816
817 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
818 DAG.getConstant(ByteOffset, MVT::i32), // PTR
819 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
820 false, false, false, 0);
821}
822
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000823SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
824
825 MachineFunction &MF = DAG.getMachineFunction();
826 const AMDGPUFrameLowering *TFL =
827 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
828
829 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
830 assert(FIN);
831
832 unsigned FrameIndex = FIN->getIndex();
833 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
834 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
835}
836
Tom Stellard75aadc22012-12-11 21:25:42 +0000837bool R600TargetLowering::isZero(SDValue Op) const {
838 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
839 return Cst->isNullValue();
840 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
841 return CstFP->isZero();
842 } else {
843 return false;
844 }
845}
846
847SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000848 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000849 EVT VT = Op.getValueType();
850
851 SDValue LHS = Op.getOperand(0);
852 SDValue RHS = Op.getOperand(1);
853 SDValue True = Op.getOperand(2);
854 SDValue False = Op.getOperand(3);
855 SDValue CC = Op.getOperand(4);
856 SDValue Temp;
857
858 // LHS and RHS are guaranteed to be the same value type
859 EVT CompareVT = LHS.getValueType();
860
861 // Check if we can lower this to a native operation.
862
Tom Stellard2add82d2013-03-08 15:37:09 +0000863 // Try to lower to a SET* instruction:
864 //
865 // SET* can match the following patterns:
866 //
Tom Stellardcd428182013-09-28 02:50:38 +0000867 // select_cc f32, f32, -1, 0, cc_supported
868 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
869 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000870 //
871
872 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000873 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
874 ISD::CondCode InverseCC =
875 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000876 if (isHWTrueValue(False) && isHWFalseValue(True)) {
877 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
878 std::swap(False, True);
879 CC = DAG.getCondCode(InverseCC);
880 } else {
881 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
882 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
883 std::swap(False, True);
884 std::swap(LHS, RHS);
885 CC = DAG.getCondCode(SwapInvCC);
886 }
887 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000888 }
889
890 if (isHWTrueValue(True) && isHWFalseValue(False) &&
891 (CompareVT == VT || VT == MVT::i32)) {
892 // This can be matched by a SET* instruction.
893 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
894 }
895
Tom Stellard75aadc22012-12-11 21:25:42 +0000896 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000897 //
898 // CND* can match the following patterns:
899 //
Tom Stellardcd428182013-09-28 02:50:38 +0000900 // select_cc f32, 0.0, f32, f32, cc_supported
901 // select_cc f32, 0.0, i32, i32, cc_supported
902 // select_cc i32, 0, f32, f32, cc_supported
903 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000904 //
Tom Stellardcd428182013-09-28 02:50:38 +0000905
906 // Try to move the zero value to the RHS
907 if (isZero(LHS)) {
908 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
909 // Try swapping the operands
910 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
911 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
912 std::swap(LHS, RHS);
913 CC = DAG.getCondCode(CCSwapped);
914 } else {
915 // Try inverting the conditon and then swapping the operands
916 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
917 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
918 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
919 std::swap(True, False);
920 std::swap(LHS, RHS);
921 CC = DAG.getCondCode(CCSwapped);
922 }
923 }
924 }
925 if (isZero(RHS)) {
926 SDValue Cond = LHS;
927 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000928 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
929 if (CompareVT != VT) {
930 // Bitcast True / False to the correct types. This will end up being
931 // a nop, but it allows us to define only a single pattern in the
932 // .TD files for each CND* instruction rather than having to have
933 // one pattern for integer True/False and one for fp True/False
934 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
935 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
936 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000937
938 switch (CCOpcode) {
939 case ISD::SETONE:
940 case ISD::SETUNE:
941 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000942 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
943 Temp = True;
944 True = False;
945 False = Temp;
946 break;
947 default:
948 break;
949 }
950 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
951 Cond, Zero,
952 True, False,
953 DAG.getCondCode(CCOpcode));
954 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
955 }
956
Tom Stellard75aadc22012-12-11 21:25:42 +0000957
958 // Possible Min/Max pattern
959 SDValue MinMax = LowerMinMax(Op, DAG);
960 if (MinMax.getNode()) {
961 return MinMax;
962 }
963
964 // If we make it this for it means we have no native instructions to handle
965 // this SELECT_CC, so we must lower it.
966 SDValue HWTrue, HWFalse;
967
968 if (CompareVT == MVT::f32) {
969 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
970 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
971 } else if (CompareVT == MVT::i32) {
972 HWTrue = DAG.getConstant(-1, CompareVT);
973 HWFalse = DAG.getConstant(0, CompareVT);
974 }
975 else {
976 assert(!"Unhandled value type in LowerSELECT_CC");
977 }
978
979 // Lower this unsupported SELECT_CC into a combination of two supported
980 // SELECT_CC operations.
981 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
982
983 return DAG.getNode(ISD::SELECT_CC, DL, VT,
984 Cond, HWFalse,
985 True, False,
986 DAG.getCondCode(ISD::SETNE));
987}
988
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000989/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
990/// convert these pointers to a register index. Each register holds
991/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
992/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
993/// for indirect addressing.
994SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
995 unsigned StackWidth,
996 SelectionDAG &DAG) const {
997 unsigned SRLPad;
998 switch(StackWidth) {
999 case 1:
1000 SRLPad = 2;
1001 break;
1002 case 2:
1003 SRLPad = 3;
1004 break;
1005 case 4:
1006 SRLPad = 4;
1007 break;
1008 default: llvm_unreachable("Invalid stack width");
1009 }
1010
Andrew Trickef9de2a2013-05-25 02:42:55 +00001011 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001012 DAG.getConstant(SRLPad, MVT::i32));
1013}
1014
1015void R600TargetLowering::getStackAddress(unsigned StackWidth,
1016 unsigned ElemIdx,
1017 unsigned &Channel,
1018 unsigned &PtrIncr) const {
1019 switch (StackWidth) {
1020 default:
1021 case 1:
1022 Channel = 0;
1023 if (ElemIdx > 0) {
1024 PtrIncr = 1;
1025 } else {
1026 PtrIncr = 0;
1027 }
1028 break;
1029 case 2:
1030 Channel = ElemIdx % 2;
1031 if (ElemIdx == 2) {
1032 PtrIncr = 1;
1033 } else {
1034 PtrIncr = 0;
1035 }
1036 break;
1037 case 4:
1038 Channel = ElemIdx;
1039 PtrIncr = 0;
1040 break;
1041 }
1042}
1043
Tom Stellard75aadc22012-12-11 21:25:42 +00001044SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001045 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001046 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1047 SDValue Chain = Op.getOperand(0);
1048 SDValue Value = Op.getOperand(1);
1049 SDValue Ptr = Op.getOperand(2);
1050
Tom Stellard2ffc3302013-08-26 15:05:44 +00001051 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001052 if (Result.getNode()) {
1053 return Result;
1054 }
1055
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001056 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1057 if (StoreNode->isTruncatingStore()) {
1058 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001059 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 EVT MemVT = StoreNode->getMemoryVT();
1061 SDValue MaskConstant;
1062 if (MemVT == MVT::i8) {
1063 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1064 } else {
1065 assert(MemVT == MVT::i16);
1066 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1067 }
1068 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1069 DAG.getConstant(2, MVT::i32));
1070 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1071 DAG.getConstant(0x00000003, VT));
1072 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1073 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1074 DAG.getConstant(3, VT));
1075 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1076 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1077 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1078 // vector instead.
1079 SDValue Src[4] = {
1080 ShiftedValue,
1081 DAG.getConstant(0, MVT::i32),
1082 DAG.getConstant(0, MVT::i32),
1083 Mask
1084 };
1085 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1086 SDValue Args[3] = { Chain, Input, DWordAddr };
1087 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1088 Op->getVTList(), Args, 3, MemVT,
1089 StoreNode->getMemOperand());
1090 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1091 Value.getValueType().bitsGE(MVT::i32)) {
1092 // Convert pointer from byte address to dword address.
1093 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1094 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1095 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001096
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001097 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1098 assert(!"Truncated and indexed stores not supported yet");
1099 } else {
1100 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1101 }
1102 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001103 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001104 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001105
1106 EVT ValueVT = Value.getValueType();
1107
1108 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1109 return SDValue();
1110 }
1111
1112 // Lowering for indirect addressing
1113
1114 const MachineFunction &MF = DAG.getMachineFunction();
1115 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1116 getTargetMachine().getFrameLowering());
1117 unsigned StackWidth = TFL->getStackWidth(MF);
1118
1119 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1120
1121 if (ValueVT.isVector()) {
1122 unsigned NumElemVT = ValueVT.getVectorNumElements();
1123 EVT ElemVT = ValueVT.getVectorElementType();
1124 SDValue Stores[4];
1125
1126 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1127 "vector width in load");
1128
1129 for (unsigned i = 0; i < NumElemVT; ++i) {
1130 unsigned Channel, PtrIncr;
1131 getStackAddress(StackWidth, i, Channel, PtrIncr);
1132 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1133 DAG.getConstant(PtrIncr, MVT::i32));
1134 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1135 Value, DAG.getConstant(i, MVT::i32));
1136
1137 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1138 Chain, Elem, Ptr,
1139 DAG.getTargetConstant(Channel, MVT::i32));
1140 }
1141 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1142 } else {
1143 if (ValueVT == MVT::i8) {
1144 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1145 }
1146 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001147 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001148 }
1149
1150 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001151}
1152
Tom Stellard365366f2013-01-23 02:09:06 +00001153// return (512 + (kc_bank << 12)
1154static int
1155ConstantAddressBlock(unsigned AddressSpace) {
1156 switch (AddressSpace) {
1157 case AMDGPUAS::CONSTANT_BUFFER_0:
1158 return 512;
1159 case AMDGPUAS::CONSTANT_BUFFER_1:
1160 return 512 + 4096;
1161 case AMDGPUAS::CONSTANT_BUFFER_2:
1162 return 512 + 4096 * 2;
1163 case AMDGPUAS::CONSTANT_BUFFER_3:
1164 return 512 + 4096 * 3;
1165 case AMDGPUAS::CONSTANT_BUFFER_4:
1166 return 512 + 4096 * 4;
1167 case AMDGPUAS::CONSTANT_BUFFER_5:
1168 return 512 + 4096 * 5;
1169 case AMDGPUAS::CONSTANT_BUFFER_6:
1170 return 512 + 4096 * 6;
1171 case AMDGPUAS::CONSTANT_BUFFER_7:
1172 return 512 + 4096 * 7;
1173 case AMDGPUAS::CONSTANT_BUFFER_8:
1174 return 512 + 4096 * 8;
1175 case AMDGPUAS::CONSTANT_BUFFER_9:
1176 return 512 + 4096 * 9;
1177 case AMDGPUAS::CONSTANT_BUFFER_10:
1178 return 512 + 4096 * 10;
1179 case AMDGPUAS::CONSTANT_BUFFER_11:
1180 return 512 + 4096 * 11;
1181 case AMDGPUAS::CONSTANT_BUFFER_12:
1182 return 512 + 4096 * 12;
1183 case AMDGPUAS::CONSTANT_BUFFER_13:
1184 return 512 + 4096 * 13;
1185 case AMDGPUAS::CONSTANT_BUFFER_14:
1186 return 512 + 4096 * 14;
1187 case AMDGPUAS::CONSTANT_BUFFER_15:
1188 return 512 + 4096 * 15;
1189 default:
1190 return -1;
1191 }
1192}
1193
1194SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1195{
1196 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001197 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001198 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1199 SDValue Chain = Op.getOperand(0);
1200 SDValue Ptr = Op.getOperand(1);
1201 SDValue LoweredLoad;
1202
Tom Stellard35bb18c2013-08-26 15:06:04 +00001203 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1204 SDValue MergedValues[2] = {
1205 SplitVectorLoad(Op, DAG),
1206 Chain
1207 };
1208 return DAG.getMergeValues(MergedValues, 2, DL);
1209 }
1210
Tom Stellard365366f2013-01-23 02:09:06 +00001211 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Tom Stellardaf775432013-10-23 00:44:32 +00001212 if (ConstantBlock > -1 && LoadNode->getExtensionType() != ISD::SEXTLOAD) {
Tom Stellard365366f2013-01-23 02:09:06 +00001213 SDValue Result;
1214 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001215 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1216 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001217 SDValue Slots[4];
1218 for (unsigned i = 0; i < 4; i++) {
1219 // We want Const position encoded with the following formula :
1220 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1221 // const_index is Ptr computed by llvm using an alignment of 16.
1222 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1223 // then div by 4 at the ISel step
1224 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1225 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1226 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1227 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001228 EVT NewVT = MVT::v4i32;
1229 unsigned NumElements = 4;
1230 if (VT.isVector()) {
1231 NewVT = VT;
1232 NumElements = VT.getVectorNumElements();
1233 }
1234 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001235 } else {
1236 // non constant ptr cant be folded, keeps it as a v4f32 load
1237 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001238 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001239 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001240 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001241 );
1242 }
1243
1244 if (!VT.isVector()) {
1245 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1246 DAG.getConstant(0, MVT::i32));
1247 }
1248
1249 SDValue MergedValues[2] = {
1250 Result,
1251 Chain
1252 };
1253 return DAG.getMergeValues(MergedValues, 2, DL);
1254 }
1255
Tom Stellard84021442013-07-23 01:48:24 +00001256 // For most operations returning SDValue() will result int he node being
1257 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1258 // we need to manually expand loads that may be legal in some address spaces
1259 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1260 // for compute shaders, since the data is sign extended when it is uploaded
1261 // to the buffer. Howerver SEXT loads from other addresspaces are not
1262 // supported, so we need to expand them here.
1263 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1264 EVT MemVT = LoadNode->getMemoryVT();
1265 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1266 SDValue ShiftAmount =
1267 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1268 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1269 LoadNode->getPointerInfo(), MemVT,
1270 LoadNode->isVolatile(),
1271 LoadNode->isNonTemporal(),
1272 LoadNode->getAlignment());
1273 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1274 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1275
1276 SDValue MergedValues[2] = { Sra, Chain };
1277 return DAG.getMergeValues(MergedValues, 2, DL);
1278 }
1279
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001280 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1281 return SDValue();
1282 }
1283
1284 // Lowering for indirect addressing
1285 const MachineFunction &MF = DAG.getMachineFunction();
1286 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1287 getTargetMachine().getFrameLowering());
1288 unsigned StackWidth = TFL->getStackWidth(MF);
1289
1290 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1291
1292 if (VT.isVector()) {
1293 unsigned NumElemVT = VT.getVectorNumElements();
1294 EVT ElemVT = VT.getVectorElementType();
1295 SDValue Loads[4];
1296
1297 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1298 "vector width in load");
1299
1300 for (unsigned i = 0; i < NumElemVT; ++i) {
1301 unsigned Channel, PtrIncr;
1302 getStackAddress(StackWidth, i, Channel, PtrIncr);
1303 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1304 DAG.getConstant(PtrIncr, MVT::i32));
1305 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1306 Chain, Ptr,
1307 DAG.getTargetConstant(Channel, MVT::i32),
1308 Op.getOperand(2));
1309 }
1310 for (unsigned i = NumElemVT; i < 4; ++i) {
1311 Loads[i] = DAG.getUNDEF(ElemVT);
1312 }
1313 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1314 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1315 } else {
1316 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1317 Chain, Ptr,
1318 DAG.getTargetConstant(0, MVT::i32), // Channel
1319 Op.getOperand(2));
1320 }
1321
1322 SDValue Ops[2];
1323 Ops[0] = LoweredLoad;
1324 Ops[1] = Chain;
1325
1326 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001327}
Tom Stellard75aadc22012-12-11 21:25:42 +00001328
Tom Stellard75aadc22012-12-11 21:25:42 +00001329/// XXX Only kernel functions are supported, so we can assume for now that
1330/// every function is a kernel function, but in the future we should use
1331/// separate calling conventions for kernel and non-kernel functions.
1332SDValue R600TargetLowering::LowerFormalArguments(
1333 SDValue Chain,
1334 CallingConv::ID CallConv,
1335 bool isVarArg,
1336 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001337 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001338 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001339 SmallVector<CCValAssign, 16> ArgLocs;
1340 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1341 getTargetMachine(), ArgLocs, *DAG.getContext());
1342
Tom Stellardaf775432013-10-23 00:44:32 +00001343 SmallVector<ISD::InputArg, 8> LocalIns;
1344
1345 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1346 LocalIns);
1347
1348 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001349
Tom Stellard1e803092013-07-23 01:48:18 +00001350 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001351 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001352 EVT VT = Ins[i].VT;
1353 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001354
Tom Stellard75aadc22012-12-11 21:25:42 +00001355 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001356 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001357
1358 // The first 36 bytes of the input buffer contains information about
1359 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001360 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1361 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1362 MachinePointerInfo(UndefValue::get(PtrTy)),
1363 MemVT, false, false, 4);
1364 // 4 is the prefered alignment for
1365 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001366 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001367 }
1368 return Chain;
1369}
1370
Matt Arsenault758659232013-05-18 00:21:46 +00001371EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001372 if (!VT.isVector()) return MVT::i32;
1373 return VT.changeVectorElementTypeToInteger();
1374}
1375
Benjamin Kramer193960c2013-06-11 13:32:25 +00001376static SDValue
1377CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1378 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001379 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1380 assert(RemapSwizzle.empty());
1381 SDValue NewBldVec[4] = {
1382 VectorEntry.getOperand(0),
1383 VectorEntry.getOperand(1),
1384 VectorEntry.getOperand(2),
1385 VectorEntry.getOperand(3)
1386 };
1387
1388 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001389 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1390 // We mask write here to teach later passes that the ith element of this
1391 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1392 // break false dependencies and additionnaly make assembly easier to read.
1393 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001394 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1395 if (C->isZero()) {
1396 RemapSwizzle[i] = 4; // SEL_0
1397 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1398 } else if (C->isExactlyValue(1.0)) {
1399 RemapSwizzle[i] = 5; // SEL_1
1400 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1401 }
1402 }
1403
1404 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1405 continue;
1406 for (unsigned j = 0; j < i; j++) {
1407 if (NewBldVec[i] == NewBldVec[j]) {
1408 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1409 RemapSwizzle[i] = j;
1410 break;
1411 }
1412 }
1413 }
1414
1415 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1416 VectorEntry.getValueType(), NewBldVec, 4);
1417}
1418
Benjamin Kramer193960c2013-06-11 13:32:25 +00001419static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1420 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001421 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1422 assert(RemapSwizzle.empty());
1423 SDValue NewBldVec[4] = {
1424 VectorEntry.getOperand(0),
1425 VectorEntry.getOperand(1),
1426 VectorEntry.getOperand(2),
1427 VectorEntry.getOperand(3)
1428 };
1429 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001430 for (unsigned i = 0; i < 4; i++)
1431 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001432
1433 for (unsigned i = 0; i < 4; i++) {
1434 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1435 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1436 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001437 if (i == Idx) {
1438 isUnmovable[Idx] = true;
1439 continue;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001440 }
Vincent Lejeune301beb82013-10-13 17:56:04 +00001441 if (isUnmovable[Idx])
1442 continue;
1443 // Swap i and Idx
1444 std::swap(NewBldVec[Idx], NewBldVec[i]);
1445 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1446 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001447 }
1448 }
1449
1450 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1451 VectorEntry.getValueType(), NewBldVec, 4);
1452}
1453
1454
1455SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1456SDValue Swz[4], SelectionDAG &DAG) const {
1457 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1458 // Old -> New swizzle values
1459 DenseMap<unsigned, unsigned> SwizzleRemap;
1460
1461 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1462 for (unsigned i = 0; i < 4; i++) {
1463 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1464 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1465 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1466 }
1467
1468 SwizzleRemap.clear();
1469 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1470 for (unsigned i = 0; i < 4; i++) {
1471 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1472 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1473 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1474 }
1475
1476 return BuildVector;
1477}
1478
1479
Tom Stellard75aadc22012-12-11 21:25:42 +00001480//===----------------------------------------------------------------------===//
1481// Custom DAG Optimizations
1482//===----------------------------------------------------------------------===//
1483
1484SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1485 DAGCombinerInfo &DCI) const {
1486 SelectionDAG &DAG = DCI.DAG;
1487
1488 switch (N->getOpcode()) {
1489 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1490 case ISD::FP_ROUND: {
1491 SDValue Arg = N->getOperand(0);
1492 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001493 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001494 Arg.getOperand(0));
1495 }
1496 break;
1497 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001498
1499 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1500 // (i32 select_cc f32, f32, -1, 0 cc)
1501 //
1502 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1503 // this to one of the SET*_DX10 instructions.
1504 case ISD::FP_TO_SINT: {
1505 SDValue FNeg = N->getOperand(0);
1506 if (FNeg.getOpcode() != ISD::FNEG) {
1507 return SDValue();
1508 }
1509 SDValue SelectCC = FNeg.getOperand(0);
1510 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1511 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1512 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1513 !isHWTrueValue(SelectCC.getOperand(2)) ||
1514 !isHWFalseValue(SelectCC.getOperand(3))) {
1515 return SDValue();
1516 }
1517
Andrew Trickef9de2a2013-05-25 02:42:55 +00001518 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001519 SelectCC.getOperand(0), // LHS
1520 SelectCC.getOperand(1), // RHS
1521 DAG.getConstant(-1, MVT::i32), // True
1522 DAG.getConstant(0, MVT::i32), // Flase
1523 SelectCC.getOperand(4)); // CC
1524
1525 break;
1526 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001527
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001528 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1529 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001530 case ISD::INSERT_VECTOR_ELT: {
1531 SDValue InVec = N->getOperand(0);
1532 SDValue InVal = N->getOperand(1);
1533 SDValue EltNo = N->getOperand(2);
1534 SDLoc dl(N);
1535
1536 // If the inserted element is an UNDEF, just use the input vector.
1537 if (InVal.getOpcode() == ISD::UNDEF)
1538 return InVec;
1539
1540 EVT VT = InVec.getValueType();
1541
1542 // If we can't generate a legal BUILD_VECTOR, exit
1543 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1544 return SDValue();
1545
1546 // Check that we know which element is being inserted
1547 if (!isa<ConstantSDNode>(EltNo))
1548 return SDValue();
1549 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1550
1551 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1552 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1553 // vector elements.
1554 SmallVector<SDValue, 8> Ops;
1555 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1556 Ops.append(InVec.getNode()->op_begin(),
1557 InVec.getNode()->op_end());
1558 } else if (InVec.getOpcode() == ISD::UNDEF) {
1559 unsigned NElts = VT.getVectorNumElements();
1560 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1561 } else {
1562 return SDValue();
1563 }
1564
1565 // Insert the element
1566 if (Elt < Ops.size()) {
1567 // All the operands of BUILD_VECTOR must have the same type;
1568 // we enforce that here.
1569 EVT OpVT = Ops[0].getValueType();
1570 if (InVal.getValueType() != OpVT)
1571 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1572 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1573 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1574 Ops[Elt] = InVal;
1575 }
1576
1577 // Return the new vector
1578 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1579 VT, &Ops[0], Ops.size());
1580 }
1581
Tom Stellard365366f2013-01-23 02:09:06 +00001582 // Extract_vec (Build_vector) generated by custom lowering
1583 // also needs to be customly combined
1584 case ISD::EXTRACT_VECTOR_ELT: {
1585 SDValue Arg = N->getOperand(0);
1586 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1587 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1588 unsigned Element = Const->getZExtValue();
1589 return Arg->getOperand(Element);
1590 }
1591 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001592 if (Arg.getOpcode() == ISD::BITCAST &&
1593 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1594 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1595 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001596 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001597 Arg->getOperand(0).getOperand(Element));
1598 }
1599 }
Tom Stellard365366f2013-01-23 02:09:06 +00001600 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001601
1602 case ISD::SELECT_CC: {
1603 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1604 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001605 //
1606 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1607 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001608 SDValue LHS = N->getOperand(0);
1609 if (LHS.getOpcode() != ISD::SELECT_CC) {
1610 return SDValue();
1611 }
1612
1613 SDValue RHS = N->getOperand(1);
1614 SDValue True = N->getOperand(2);
1615 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001616 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001617
1618 if (LHS.getOperand(2).getNode() != True.getNode() ||
1619 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001620 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001621 return SDValue();
1622 }
1623
Tom Stellard5e524892013-03-08 15:37:11 +00001624 switch (NCC) {
1625 default: return SDValue();
1626 case ISD::SETNE: return LHS;
1627 case ISD::SETEQ: {
1628 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1629 LHSCC = ISD::getSetCCInverse(LHSCC,
1630 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001631 if (DCI.isBeforeLegalizeOps() ||
1632 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1633 return DAG.getSelectCC(SDLoc(N),
1634 LHS.getOperand(0),
1635 LHS.getOperand(1),
1636 LHS.getOperand(2),
1637 LHS.getOperand(3),
1638 LHSCC);
1639 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001640 }
Tom Stellard5e524892013-03-08 15:37:11 +00001641 }
Tom Stellardcd428182013-09-28 02:50:38 +00001642 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001643 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001644
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001645 case AMDGPUISD::EXPORT: {
1646 SDValue Arg = N->getOperand(1);
1647 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1648 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001649
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001650 SDValue NewArgs[8] = {
1651 N->getOperand(0), // Chain
1652 SDValue(),
1653 N->getOperand(2), // ArrayBase
1654 N->getOperand(3), // Type
1655 N->getOperand(4), // SWZ_X
1656 N->getOperand(5), // SWZ_Y
1657 N->getOperand(6), // SWZ_Z
1658 N->getOperand(7) // SWZ_W
1659 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001660 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001661 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001662 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001663 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001664 case AMDGPUISD::TEXTURE_FETCH: {
1665 SDValue Arg = N->getOperand(1);
1666 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1667 break;
1668
1669 SDValue NewArgs[19] = {
1670 N->getOperand(0),
1671 N->getOperand(1),
1672 N->getOperand(2),
1673 N->getOperand(3),
1674 N->getOperand(4),
1675 N->getOperand(5),
1676 N->getOperand(6),
1677 N->getOperand(7),
1678 N->getOperand(8),
1679 N->getOperand(9),
1680 N->getOperand(10),
1681 N->getOperand(11),
1682 N->getOperand(12),
1683 N->getOperand(13),
1684 N->getOperand(14),
1685 N->getOperand(15),
1686 N->getOperand(16),
1687 N->getOperand(17),
1688 N->getOperand(18),
1689 };
1690 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1691 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1692 NewArgs, 19);
1693 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001694 }
1695 return SDValue();
1696}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001697
1698static bool
1699FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001700 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001701 const R600InstrInfo *TII =
1702 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1703 if (!Src.isMachineOpcode())
1704 return false;
1705 switch (Src.getMachineOpcode()) {
1706 case AMDGPU::FNEG_R600:
1707 if (!Neg.getNode())
1708 return false;
1709 Src = Src.getOperand(0);
1710 Neg = DAG.getTargetConstant(1, MVT::i32);
1711 return true;
1712 case AMDGPU::FABS_R600:
1713 if (!Abs.getNode())
1714 return false;
1715 Src = Src.getOperand(0);
1716 Abs = DAG.getTargetConstant(1, MVT::i32);
1717 return true;
1718 case AMDGPU::CONST_COPY: {
1719 unsigned Opcode = ParentNode->getMachineOpcode();
1720 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1721
1722 if (!Sel.getNode())
1723 return false;
1724
1725 SDValue CstOffset = Src.getOperand(0);
1726 if (ParentNode->getValueType(0).isVector())
1727 return false;
1728
1729 // Gather constants values
1730 int SrcIndices[] = {
1731 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1732 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1733 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1734 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1735 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1736 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1737 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1738 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1739 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1740 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1741 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1742 };
1743 std::vector<unsigned> Consts;
1744 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1745 int OtherSrcIdx = SrcIndices[i];
1746 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1747 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1748 continue;
1749 if (HasDst) {
1750 OtherSrcIdx--;
1751 OtherSelIdx--;
1752 }
1753 if (RegisterSDNode *Reg =
1754 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1755 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1756 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1757 ParentNode->getOperand(OtherSelIdx));
1758 Consts.push_back(Cst->getZExtValue());
1759 }
1760 }
1761 }
1762
1763 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1764 Consts.push_back(Cst->getZExtValue());
1765 if (!TII->fitsConstReadLimitations(Consts)) {
1766 return false;
1767 }
1768
1769 Sel = CstOffset;
1770 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1771 return true;
1772 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001773 case AMDGPU::MOV_IMM_I32:
1774 case AMDGPU::MOV_IMM_F32: {
1775 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1776 uint64_t ImmValue = 0;
1777
1778
1779 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1780 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1781 float FloatValue = FPC->getValueAPF().convertToFloat();
1782 if (FloatValue == 0.0) {
1783 ImmReg = AMDGPU::ZERO;
1784 } else if (FloatValue == 0.5) {
1785 ImmReg = AMDGPU::HALF;
1786 } else if (FloatValue == 1.0) {
1787 ImmReg = AMDGPU::ONE;
1788 } else {
1789 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1790 }
1791 } else {
1792 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1793 uint64_t Value = C->getZExtValue();
1794 if (Value == 0) {
1795 ImmReg = AMDGPU::ZERO;
1796 } else if (Value == 1) {
1797 ImmReg = AMDGPU::ONE_INT;
1798 } else {
1799 ImmValue = Value;
1800 }
1801 }
1802
1803 // Check that we aren't already using an immediate.
1804 // XXX: It's possible for an instruction to have more than one
1805 // immediate operand, but this is not supported yet.
1806 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1807 if (!Imm.getNode())
1808 return false;
1809 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1810 assert(C);
1811 if (C->getZExtValue())
1812 return false;
1813 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1814 }
1815 Src = DAG.getRegister(ImmReg, MVT::i32);
1816 return true;
1817 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001818 default:
1819 return false;
1820 }
1821}
1822
1823
1824/// \brief Fold the instructions after selecting them
1825SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1826 SelectionDAG &DAG) const {
1827 const R600InstrInfo *TII =
1828 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1829 if (!Node->isMachineOpcode())
1830 return Node;
1831 unsigned Opcode = Node->getMachineOpcode();
1832 SDValue FakeOp;
1833
1834 std::vector<SDValue> Ops;
1835 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1836 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001837 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001838
1839 if (Opcode == AMDGPU::DOT_4) {
1840 int OperandIdx[] = {
1841 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1842 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1843 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1844 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1845 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1846 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1847 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1848 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001849 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001850 int NegIdx[] = {
1851 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1852 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1853 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1854 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1855 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1856 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1857 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1858 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1859 };
1860 int AbsIdx[] = {
1861 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1862 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1863 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1864 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1865 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1866 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1867 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1868 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1869 };
1870 for (unsigned i = 0; i < 8; i++) {
1871 if (OperandIdx[i] < 0)
1872 return Node;
1873 SDValue &Src = Ops[OperandIdx[i] - 1];
1874 SDValue &Neg = Ops[NegIdx[i] - 1];
1875 SDValue &Abs = Ops[AbsIdx[i] - 1];
1876 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1877 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1878 if (HasDst)
1879 SelIdx--;
1880 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001881 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1882 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1883 }
1884 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1885 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1886 SDValue &Src = Ops[i];
1887 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001888 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1889 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001890 } else if (Opcode == AMDGPU::CLAMP_R600) {
1891 SDValue Src = Node->getOperand(0);
1892 if (!Src.isMachineOpcode() ||
1893 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1894 return Node;
1895 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1896 AMDGPU::OpName::clamp);
1897 if (ClampIdx < 0)
1898 return Node;
1899 std::vector<SDValue> Ops;
1900 unsigned NumOp = Src.getNumOperands();
1901 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001902 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001903 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1904 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1905 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001906 } else {
1907 if (!TII->hasInstrModifiers(Opcode))
1908 return Node;
1909 int OperandIdx[] = {
1910 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1911 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1912 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1913 };
1914 int NegIdx[] = {
1915 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1916 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1917 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1918 };
1919 int AbsIdx[] = {
1920 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1921 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1922 -1
1923 };
1924 for (unsigned i = 0; i < 3; i++) {
1925 if (OperandIdx[i] < 0)
1926 return Node;
1927 SDValue &Src = Ops[OperandIdx[i] - 1];
1928 SDValue &Neg = Ops[NegIdx[i] - 1];
1929 SDValue FakeAbs;
1930 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1931 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1932 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001933 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1934 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001935 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001936 ImmIdx--;
1937 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001938 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001939 SDValue &Imm = Ops[ImmIdx];
1940 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001941 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1942 }
1943 }
1944
1945 return Node;
1946}