blob: 336a2991d10482c9433c13d8a64d00b8913692e9 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000093 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
95 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000097 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000098 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000099 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000101 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
102 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
Tom Stellard365366f2013-01-23 02:09:06 +0000104 setOperationAction(ISD::LOAD, MVT::i32, Custom);
105 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000106 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
107
Tom Stellard75aadc22012-12-11 21:25:42 +0000108 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000109 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000111 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000112 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000113
Michel Danzer49812b52013-07-10 16:37:07 +0000114 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
115
Tom Stellardb852af52013-03-08 15:37:03 +0000116 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000117 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000118 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119}
120
121MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
122 MachineInstr * MI, MachineBasicBlock * BB) const {
123 MachineFunction * MF = BB->getParent();
124 MachineRegisterInfo &MRI = MF->getRegInfo();
125 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000126 const R600InstrInfo *TII =
127 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
129 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000130 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000131 if (TII->isLDSInstr(MI->getOpcode()) &&
132 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
133 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
134 assert(DstIdx != -1);
135 MachineInstrBuilder NewMI;
136 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
137 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
138 AMDGPU::OQAP);
139 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
140 MI->getOperand(0).getReg(),
141 AMDGPU::OQAP);
142 } else {
143 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
144 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
145 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000146 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
147 NewMI.addOperand(MI->getOperand(i));
148 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000149 } else {
150 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
151 }
152 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 case AMDGPU::CLAMP_R600: {
154 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
155 AMDGPU::MOV,
156 MI->getOperand(0).getReg(),
157 MI->getOperand(1).getReg());
158 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
159 break;
160 }
161
162 case AMDGPU::FABS_R600: {
163 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
164 AMDGPU::MOV,
165 MI->getOperand(0).getReg(),
166 MI->getOperand(1).getReg());
167 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
168 break;
169 }
170
171 case AMDGPU::FNEG_R600: {
172 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
173 AMDGPU::MOV,
174 MI->getOperand(0).getReg(),
175 MI->getOperand(1).getReg());
176 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
177 break;
178 }
179
Tom Stellard75aadc22012-12-11 21:25:42 +0000180 case AMDGPU::MASK_WRITE: {
181 unsigned maskedRegister = MI->getOperand(0).getReg();
182 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
183 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
184 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
185 break;
186 }
187
188 case AMDGPU::MOV_IMM_F32:
189 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
190 MI->getOperand(1).getFPImm()->getValueAPF()
191 .bitcastToAPInt().getZExtValue());
192 break;
193 case AMDGPU::MOV_IMM_I32:
194 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
195 MI->getOperand(1).getImm());
196 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000197 case AMDGPU::CONST_COPY: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
199 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000200 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000201 MI->getOperand(1).getImm());
202 break;
203 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000206 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000207 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
208 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
209
210 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
211 .addOperand(MI->getOperand(0))
212 .addOperand(MI->getOperand(1))
213 .addImm(EOP); // Set End of program bit
214 break;
215 }
216
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::TXD: {
218 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
219 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000220 MachineOperand &RID = MI->getOperand(4);
221 MachineOperand &SID = MI->getOperand(5);
222 unsigned TextureId = MI->getOperand(6).getImm();
223 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
224 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 switch (TextureId) {
227 case 5: // Rect
228 CTX = CTY = 0;
229 break;
230 case 6: // Shadow1D
231 SrcW = SrcZ;
232 break;
233 case 7: // Shadow2D
234 SrcW = SrcZ;
235 break;
236 case 8: // ShadowRect
237 CTX = CTY = 0;
238 SrcW = SrcZ;
239 break;
240 case 9: // 1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 10: // 2DArray
245 CTZ = 0;
246 break;
247 case 11: // Shadow1DArray
248 SrcZ = SrcY;
249 CTZ = 0;
250 break;
251 case 12: // Shadow2DArray
252 CTZ = 0;
253 break;
254 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
256 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 .addImm(SrcX)
258 .addImm(SrcY)
259 .addImm(SrcZ)
260 .addImm(SrcW)
261 .addImm(0)
262 .addImm(0)
263 .addImm(0)
264 .addImm(0)
265 .addImm(1)
266 .addImm(2)
267 .addImm(3)
268 .addOperand(RID)
269 .addOperand(SID)
270 .addImm(CTX)
271 .addImm(CTY)
272 .addImm(CTZ)
273 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
275 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000276 .addImm(SrcX)
277 .addImm(SrcY)
278 .addImm(SrcZ)
279 .addImm(SrcW)
280 .addImm(0)
281 .addImm(0)
282 .addImm(0)
283 .addImm(0)
284 .addImm(1)
285 .addImm(2)
286 .addImm(3)
287 .addOperand(RID)
288 .addOperand(SID)
289 .addImm(CTX)
290 .addImm(CTY)
291 .addImm(CTZ)
292 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 .addImm(SrcX)
297 .addImm(SrcY)
298 .addImm(SrcZ)
299 .addImm(SrcW)
300 .addImm(0)
301 .addImm(0)
302 .addImm(0)
303 .addImm(0)
304 .addImm(1)
305 .addImm(2)
306 .addImm(3)
307 .addOperand(RID)
308 .addOperand(SID)
309 .addImm(CTX)
310 .addImm(CTY)
311 .addImm(CTZ)
312 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000313 .addReg(T0, RegState::Implicit)
314 .addReg(T1, RegState::Implicit);
315 break;
316 }
317
318 case AMDGPU::TXD_SHADOW: {
319 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
320 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 MachineOperand &RID = MI->getOperand(4);
322 MachineOperand &SID = MI->getOperand(5);
323 unsigned TextureId = MI->getOperand(6).getImm();
324 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
325 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
326
327 switch (TextureId) {
328 case 5: // Rect
329 CTX = CTY = 0;
330 break;
331 case 6: // Shadow1D
332 SrcW = SrcZ;
333 break;
334 case 7: // Shadow2D
335 SrcW = SrcZ;
336 break;
337 case 8: // ShadowRect
338 CTX = CTY = 0;
339 SrcW = SrcZ;
340 break;
341 case 9: // 1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 10: // 2DArray
346 CTZ = 0;
347 break;
348 case 11: // Shadow1DArray
349 SrcZ = SrcY;
350 CTZ = 0;
351 break;
352 case 12: // Shadow2DArray
353 CTZ = 0;
354 break;
355 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000356
357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
358 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
377 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000378 .addImm(SrcX)
379 .addImm(SrcY)
380 .addImm(SrcZ)
381 .addImm(SrcW)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(0)
386 .addImm(1)
387 .addImm(2)
388 .addImm(3)
389 .addOperand(RID)
390 .addOperand(SID)
391 .addImm(CTX)
392 .addImm(CTY)
393 .addImm(CTZ)
394 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
396 .addOperand(MI->getOperand(0))
397 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000398 .addImm(SrcX)
399 .addImm(SrcY)
400 .addImm(SrcZ)
401 .addImm(SrcW)
402 .addImm(0)
403 .addImm(0)
404 .addImm(0)
405 .addImm(0)
406 .addImm(1)
407 .addImm(2)
408 .addImm(3)
409 .addOperand(RID)
410 .addOperand(SID)
411 .addImm(CTX)
412 .addImm(CTY)
413 .addImm(CTZ)
414 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000415 .addReg(T0, RegState::Implicit)
416 .addReg(T1, RegState::Implicit);
417 break;
418 }
419
420 case AMDGPU::BRANCH:
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000422 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 break;
424
425 case AMDGPU::BRANCH_COND_f32: {
426 MachineInstr *NewMI =
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
428 AMDGPU::PREDICATE_BIT)
429 .addOperand(MI->getOperand(1))
430 .addImm(OPCODE_IS_NOT_ZERO)
431 .addImm(0); // Flags
432 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000433 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000434 .addOperand(MI->getOperand(0))
435 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
436 break;
437 }
438
439 case AMDGPU::BRANCH_COND_i32: {
440 MachineInstr *NewMI =
441 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
442 AMDGPU::PREDICATE_BIT)
443 .addOperand(MI->getOperand(1))
444 .addImm(OPCODE_IS_NOT_ZERO_INT)
445 .addImm(0); // Flags
446 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 .addOperand(MI->getOperand(0))
449 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
450 break;
451 }
452
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 case AMDGPU::EG_ExportSwz:
454 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 // Instruction is left unmodified if its not the last one of its type
456 bool isLastInstructionOfItsType = true;
457 unsigned InstExportType = MI->getOperand(1).getImm();
458 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
459 EndBlock = BB->end(); NextExportInst != EndBlock;
460 NextExportInst = llvm::next(NextExportInst)) {
461 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
462 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
463 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
464 .getImm();
465 if (CurrentInstExportType == InstExportType) {
466 isLastInstructionOfItsType = false;
467 break;
468 }
469 }
470 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000472 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 return BB;
474 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
476 .addOperand(MI->getOperand(0))
477 .addOperand(MI->getOperand(1))
478 .addOperand(MI->getOperand(2))
479 .addOperand(MI->getOperand(3))
480 .addOperand(MI->getOperand(4))
481 .addOperand(MI->getOperand(5))
482 .addOperand(MI->getOperand(6))
483 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 break;
486 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000487 case AMDGPU::RETURN: {
488 // RETURN instructions must have the live-out registers as implicit uses,
489 // otherwise they appear dead.
490 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
491 MachineInstrBuilder MIB(*MF, MI);
492 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
493 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
494 return BB;
495 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 }
497
498 MI->eraseFromParent();
499 return BB;
500}
501
502//===----------------------------------------------------------------------===//
503// Custom DAG Lowering Operations
504//===----------------------------------------------------------------------===//
505
Tom Stellard75aadc22012-12-11 21:25:42 +0000506SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000507 MachineFunction &MF = DAG.getMachineFunction();
508 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 switch (Op.getOpcode()) {
510 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000511 case ISD::FCOS:
512 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000515 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000516 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000517 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 case ISD::INTRINSIC_VOID: {
519 SDValue Chain = Op.getOperand(0);
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
522 switch (IntrinsicID) {
523 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
525 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000526 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000527 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000529 case AMDGPUIntrinsic::R600_store_swizzle: {
530 const SDValue Args[8] = {
531 Chain,
532 Op.getOperand(2), // Export Value
533 Op.getOperand(3), // ArrayBase
534 Op.getOperand(4), // Type
535 DAG.getConstant(0, MVT::i32), // SWZ_X
536 DAG.getConstant(1, MVT::i32), // SWZ_Y
537 DAG.getConstant(2, MVT::i32), // SWZ_Z
538 DAG.getConstant(3, MVT::i32) // SWZ_W
539 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000540 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000541 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 // default for switch(IntrinsicID)
545 default: break;
546 }
547 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
548 break;
549 }
550 case ISD::INTRINSIC_WO_CHAIN: {
551 unsigned IntrinsicID =
552 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
553 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000554 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 switch(IntrinsicID) {
556 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000557 case AMDGPUIntrinsic::R600_interp_xy:
558 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000559 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000560 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000561 SDValue RegisterINode = Op.getOperand(2);
562 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000563
Vincent Lejeunef143af32013-11-11 22:10:24 +0000564 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000565 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000566 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000567 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000568 else
569 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000570 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000571 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000572 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
573 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000574 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000575 case AMDGPUIntrinsic::R600_tex:
576 case AMDGPUIntrinsic::R600_texc:
577 case AMDGPUIntrinsic::R600_txl:
578 case AMDGPUIntrinsic::R600_txlc:
579 case AMDGPUIntrinsic::R600_txb:
580 case AMDGPUIntrinsic::R600_txbc:
581 case AMDGPUIntrinsic::R600_txf:
582 case AMDGPUIntrinsic::R600_txq:
583 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000584 case AMDGPUIntrinsic::R600_ddy:
585 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000586 unsigned TextureOp;
587 switch (IntrinsicID) {
588 case AMDGPUIntrinsic::R600_tex:
589 TextureOp = 0;
590 break;
591 case AMDGPUIntrinsic::R600_texc:
592 TextureOp = 1;
593 break;
594 case AMDGPUIntrinsic::R600_txl:
595 TextureOp = 2;
596 break;
597 case AMDGPUIntrinsic::R600_txlc:
598 TextureOp = 3;
599 break;
600 case AMDGPUIntrinsic::R600_txb:
601 TextureOp = 4;
602 break;
603 case AMDGPUIntrinsic::R600_txbc:
604 TextureOp = 5;
605 break;
606 case AMDGPUIntrinsic::R600_txf:
607 TextureOp = 6;
608 break;
609 case AMDGPUIntrinsic::R600_txq:
610 TextureOp = 7;
611 break;
612 case AMDGPUIntrinsic::R600_ddx:
613 TextureOp = 8;
614 break;
615 case AMDGPUIntrinsic::R600_ddy:
616 TextureOp = 9;
617 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000618 case AMDGPUIntrinsic::R600_ldptr:
619 TextureOp = 10;
620 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000621 default:
622 llvm_unreachable("Unknow Texture Operation");
623 }
624
625 SDValue TexArgs[19] = {
626 DAG.getConstant(TextureOp, MVT::i32),
627 Op.getOperand(1),
628 DAG.getConstant(0, MVT::i32),
629 DAG.getConstant(1, MVT::i32),
630 DAG.getConstant(2, MVT::i32),
631 DAG.getConstant(3, MVT::i32),
632 Op.getOperand(2),
633 Op.getOperand(3),
634 Op.getOperand(4),
635 DAG.getConstant(0, MVT::i32),
636 DAG.getConstant(1, MVT::i32),
637 DAG.getConstant(2, MVT::i32),
638 DAG.getConstant(3, MVT::i32),
639 Op.getOperand(5),
640 Op.getOperand(6),
641 Op.getOperand(7),
642 Op.getOperand(8),
643 Op.getOperand(9),
644 Op.getOperand(10)
645 };
646 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
647 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000648 case AMDGPUIntrinsic::AMDGPU_dp4: {
649 SDValue Args[8] = {
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
651 DAG.getConstant(0, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
653 DAG.getConstant(0, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
655 DAG.getConstant(1, MVT::i32)),
656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
657 DAG.getConstant(1, MVT::i32)),
658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
659 DAG.getConstant(2, MVT::i32)),
660 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
661 DAG.getConstant(2, MVT::i32)),
662 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
663 DAG.getConstant(3, MVT::i32)),
664 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
665 DAG.getConstant(3, MVT::i32))
666 };
667 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
668 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000669
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000670 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000676 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000677 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000682 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000683 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000684 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000685 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return LowerImplicitParameter(DAG, VT, DL, 8);
688
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
697 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
700 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000701 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000702 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
703 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000704 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000705 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
706 AMDGPU::T0_Z, VT);
707 }
708 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
709 break;
710 }
711 } // end switch(Op.getOpcode())
712 return SDValue();
713}
714
715void R600TargetLowering::ReplaceNodeResults(SDNode *N,
716 SmallVectorImpl<SDValue> &Results,
717 SelectionDAG &DAG) const {
718 switch (N->getOpcode()) {
719 default: return;
720 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000721 return;
722 case ISD::LOAD: {
723 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
724 Results.push_back(SDValue(Node, 0));
725 Results.push_back(SDValue(Node, 1));
726 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
727 // function
728 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
729 return;
730 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000731 case ISD::STORE:
732 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
733 Results.push_back(SDValue(Node, 0));
734 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000735 }
736}
737
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000738SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
739 // On hw >= R700, COS/SIN input must be between -1. and 1.
740 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
741 EVT VT = Op.getValueType();
742 SDValue Arg = Op.getOperand(0);
743 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
744 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
745 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
746 DAG.getConstantFP(0.15915494309, MVT::f32)),
747 DAG.getConstantFP(0.5, MVT::f32)));
748 unsigned TrigNode;
749 switch (Op.getOpcode()) {
750 case ISD::FCOS:
751 TrigNode = AMDGPUISD::COS_HW;
752 break;
753 case ISD::FSIN:
754 TrigNode = AMDGPUISD::SIN_HW;
755 break;
756 default:
757 llvm_unreachable("Wrong trig opcode");
758 }
759 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
760 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
761 DAG.getConstantFP(-0.5, MVT::f32)));
762 if (Gen >= AMDGPUSubtarget::R700)
763 return TrigVal;
764 // On R600 hw, COS/SIN input must be between -Pi and Pi.
765 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
766 DAG.getConstantFP(3.14159265359, MVT::f32));
767}
768
Tom Stellard75aadc22012-12-11 21:25:42 +0000769SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
770 return DAG.getNode(
771 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000772 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 MVT::i1,
774 Op, DAG.getConstantFP(0.0f, MVT::f32),
775 DAG.getCondCode(ISD::SETNE)
776 );
777}
778
Tom Stellard75aadc22012-12-11 21:25:42 +0000779SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000780 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 unsigned DwordOffset) const {
782 unsigned ByteOffset = DwordOffset * 4;
783 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000784 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000785
786 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
787 assert(isInt<16>(ByteOffset));
788
789 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
790 DAG.getConstant(ByteOffset, MVT::i32), // PTR
791 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
792 false, false, false, 0);
793}
794
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000795SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
796
797 MachineFunction &MF = DAG.getMachineFunction();
798 const AMDGPUFrameLowering *TFL =
799 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
800
801 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
802 assert(FIN);
803
804 unsigned FrameIndex = FIN->getIndex();
805 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
806 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
807}
808
Tom Stellard75aadc22012-12-11 21:25:42 +0000809bool R600TargetLowering::isZero(SDValue Op) const {
810 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
811 return Cst->isNullValue();
812 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
813 return CstFP->isZero();
814 } else {
815 return false;
816 }
817}
818
819SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000820 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000821 EVT VT = Op.getValueType();
822
823 SDValue LHS = Op.getOperand(0);
824 SDValue RHS = Op.getOperand(1);
825 SDValue True = Op.getOperand(2);
826 SDValue False = Op.getOperand(3);
827 SDValue CC = Op.getOperand(4);
828 SDValue Temp;
829
830 // LHS and RHS are guaranteed to be the same value type
831 EVT CompareVT = LHS.getValueType();
832
833 // Check if we can lower this to a native operation.
834
Tom Stellard2add82d2013-03-08 15:37:09 +0000835 // Try to lower to a SET* instruction:
836 //
837 // SET* can match the following patterns:
838 //
Tom Stellardcd428182013-09-28 02:50:38 +0000839 // select_cc f32, f32, -1, 0, cc_supported
840 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
841 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000842 //
843
844 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000845 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
846 ISD::CondCode InverseCC =
847 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000848 if (isHWTrueValue(False) && isHWFalseValue(True)) {
849 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
850 std::swap(False, True);
851 CC = DAG.getCondCode(InverseCC);
852 } else {
853 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
854 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
855 std::swap(False, True);
856 std::swap(LHS, RHS);
857 CC = DAG.getCondCode(SwapInvCC);
858 }
859 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000860 }
861
862 if (isHWTrueValue(True) && isHWFalseValue(False) &&
863 (CompareVT == VT || VT == MVT::i32)) {
864 // This can be matched by a SET* instruction.
865 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
866 }
867
Tom Stellard75aadc22012-12-11 21:25:42 +0000868 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000869 //
870 // CND* can match the following patterns:
871 //
Tom Stellardcd428182013-09-28 02:50:38 +0000872 // select_cc f32, 0.0, f32, f32, cc_supported
873 // select_cc f32, 0.0, i32, i32, cc_supported
874 // select_cc i32, 0, f32, f32, cc_supported
875 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000876 //
Tom Stellardcd428182013-09-28 02:50:38 +0000877
878 // Try to move the zero value to the RHS
879 if (isZero(LHS)) {
880 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
881 // Try swapping the operands
882 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
883 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
884 std::swap(LHS, RHS);
885 CC = DAG.getCondCode(CCSwapped);
886 } else {
887 // Try inverting the conditon and then swapping the operands
888 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
889 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
890 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
891 std::swap(True, False);
892 std::swap(LHS, RHS);
893 CC = DAG.getCondCode(CCSwapped);
894 }
895 }
896 }
897 if (isZero(RHS)) {
898 SDValue Cond = LHS;
899 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000900 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
901 if (CompareVT != VT) {
902 // Bitcast True / False to the correct types. This will end up being
903 // a nop, but it allows us to define only a single pattern in the
904 // .TD files for each CND* instruction rather than having to have
905 // one pattern for integer True/False and one for fp True/False
906 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
907 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
908 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000909
910 switch (CCOpcode) {
911 case ISD::SETONE:
912 case ISD::SETUNE:
913 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000914 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
915 Temp = True;
916 True = False;
917 False = Temp;
918 break;
919 default:
920 break;
921 }
922 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
923 Cond, Zero,
924 True, False,
925 DAG.getCondCode(CCOpcode));
926 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
927 }
928
Tom Stellard75aadc22012-12-11 21:25:42 +0000929
930 // Possible Min/Max pattern
931 SDValue MinMax = LowerMinMax(Op, DAG);
932 if (MinMax.getNode()) {
933 return MinMax;
934 }
935
936 // If we make it this for it means we have no native instructions to handle
937 // this SELECT_CC, so we must lower it.
938 SDValue HWTrue, HWFalse;
939
940 if (CompareVT == MVT::f32) {
941 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
942 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
943 } else if (CompareVT == MVT::i32) {
944 HWTrue = DAG.getConstant(-1, CompareVT);
945 HWFalse = DAG.getConstant(0, CompareVT);
946 }
947 else {
948 assert(!"Unhandled value type in LowerSELECT_CC");
949 }
950
951 // Lower this unsupported SELECT_CC into a combination of two supported
952 // SELECT_CC operations.
953 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
954
955 return DAG.getNode(ISD::SELECT_CC, DL, VT,
956 Cond, HWFalse,
957 True, False,
958 DAG.getCondCode(ISD::SETNE));
959}
960
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000961/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
962/// convert these pointers to a register index. Each register holds
963/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
964/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
965/// for indirect addressing.
966SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
967 unsigned StackWidth,
968 SelectionDAG &DAG) const {
969 unsigned SRLPad;
970 switch(StackWidth) {
971 case 1:
972 SRLPad = 2;
973 break;
974 case 2:
975 SRLPad = 3;
976 break;
977 case 4:
978 SRLPad = 4;
979 break;
980 default: llvm_unreachable("Invalid stack width");
981 }
982
Andrew Trickef9de2a2013-05-25 02:42:55 +0000983 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000984 DAG.getConstant(SRLPad, MVT::i32));
985}
986
987void R600TargetLowering::getStackAddress(unsigned StackWidth,
988 unsigned ElemIdx,
989 unsigned &Channel,
990 unsigned &PtrIncr) const {
991 switch (StackWidth) {
992 default:
993 case 1:
994 Channel = 0;
995 if (ElemIdx > 0) {
996 PtrIncr = 1;
997 } else {
998 PtrIncr = 0;
999 }
1000 break;
1001 case 2:
1002 Channel = ElemIdx % 2;
1003 if (ElemIdx == 2) {
1004 PtrIncr = 1;
1005 } else {
1006 PtrIncr = 0;
1007 }
1008 break;
1009 case 4:
1010 Channel = ElemIdx;
1011 PtrIncr = 0;
1012 break;
1013 }
1014}
1015
Tom Stellard75aadc22012-12-11 21:25:42 +00001016SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001017 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001018 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1019 SDValue Chain = Op.getOperand(0);
1020 SDValue Value = Op.getOperand(1);
1021 SDValue Ptr = Op.getOperand(2);
1022
Tom Stellard2ffc3302013-08-26 15:05:44 +00001023 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001024 if (Result.getNode()) {
1025 return Result;
1026 }
1027
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001028 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1029 if (StoreNode->isTruncatingStore()) {
1030 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001031 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001032 EVT MemVT = StoreNode->getMemoryVT();
1033 SDValue MaskConstant;
1034 if (MemVT == MVT::i8) {
1035 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1036 } else {
1037 assert(MemVT == MVT::i16);
1038 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1039 }
1040 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1041 DAG.getConstant(2, MVT::i32));
1042 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1043 DAG.getConstant(0x00000003, VT));
1044 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1045 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1046 DAG.getConstant(3, VT));
1047 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1048 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1049 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1050 // vector instead.
1051 SDValue Src[4] = {
1052 ShiftedValue,
1053 DAG.getConstant(0, MVT::i32),
1054 DAG.getConstant(0, MVT::i32),
1055 Mask
1056 };
1057 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1058 SDValue Args[3] = { Chain, Input, DWordAddr };
1059 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1060 Op->getVTList(), Args, 3, MemVT,
1061 StoreNode->getMemOperand());
1062 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1063 Value.getValueType().bitsGE(MVT::i32)) {
1064 // Convert pointer from byte address to dword address.
1065 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1066 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1067 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001068
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001069 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1070 assert(!"Truncated and indexed stores not supported yet");
1071 } else {
1072 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1073 }
1074 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001075 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001076 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001077
1078 EVT ValueVT = Value.getValueType();
1079
1080 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1081 return SDValue();
1082 }
1083
1084 // Lowering for indirect addressing
1085
1086 const MachineFunction &MF = DAG.getMachineFunction();
1087 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1088 getTargetMachine().getFrameLowering());
1089 unsigned StackWidth = TFL->getStackWidth(MF);
1090
1091 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1092
1093 if (ValueVT.isVector()) {
1094 unsigned NumElemVT = ValueVT.getVectorNumElements();
1095 EVT ElemVT = ValueVT.getVectorElementType();
1096 SDValue Stores[4];
1097
1098 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1099 "vector width in load");
1100
1101 for (unsigned i = 0; i < NumElemVT; ++i) {
1102 unsigned Channel, PtrIncr;
1103 getStackAddress(StackWidth, i, Channel, PtrIncr);
1104 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1105 DAG.getConstant(PtrIncr, MVT::i32));
1106 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1107 Value, DAG.getConstant(i, MVT::i32));
1108
1109 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1110 Chain, Elem, Ptr,
1111 DAG.getTargetConstant(Channel, MVT::i32));
1112 }
1113 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1114 } else {
1115 if (ValueVT == MVT::i8) {
1116 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1117 }
1118 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001119 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001120 }
1121
1122 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001123}
1124
Tom Stellard365366f2013-01-23 02:09:06 +00001125// return (512 + (kc_bank << 12)
1126static int
1127ConstantAddressBlock(unsigned AddressSpace) {
1128 switch (AddressSpace) {
1129 case AMDGPUAS::CONSTANT_BUFFER_0:
1130 return 512;
1131 case AMDGPUAS::CONSTANT_BUFFER_1:
1132 return 512 + 4096;
1133 case AMDGPUAS::CONSTANT_BUFFER_2:
1134 return 512 + 4096 * 2;
1135 case AMDGPUAS::CONSTANT_BUFFER_3:
1136 return 512 + 4096 * 3;
1137 case AMDGPUAS::CONSTANT_BUFFER_4:
1138 return 512 + 4096 * 4;
1139 case AMDGPUAS::CONSTANT_BUFFER_5:
1140 return 512 + 4096 * 5;
1141 case AMDGPUAS::CONSTANT_BUFFER_6:
1142 return 512 + 4096 * 6;
1143 case AMDGPUAS::CONSTANT_BUFFER_7:
1144 return 512 + 4096 * 7;
1145 case AMDGPUAS::CONSTANT_BUFFER_8:
1146 return 512 + 4096 * 8;
1147 case AMDGPUAS::CONSTANT_BUFFER_9:
1148 return 512 + 4096 * 9;
1149 case AMDGPUAS::CONSTANT_BUFFER_10:
1150 return 512 + 4096 * 10;
1151 case AMDGPUAS::CONSTANT_BUFFER_11:
1152 return 512 + 4096 * 11;
1153 case AMDGPUAS::CONSTANT_BUFFER_12:
1154 return 512 + 4096 * 12;
1155 case AMDGPUAS::CONSTANT_BUFFER_13:
1156 return 512 + 4096 * 13;
1157 case AMDGPUAS::CONSTANT_BUFFER_14:
1158 return 512 + 4096 * 14;
1159 case AMDGPUAS::CONSTANT_BUFFER_15:
1160 return 512 + 4096 * 15;
1161 default:
1162 return -1;
1163 }
1164}
1165
1166SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1167{
1168 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001169 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001170 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1171 SDValue Chain = Op.getOperand(0);
1172 SDValue Ptr = Op.getOperand(1);
1173 SDValue LoweredLoad;
1174
Tom Stellard35bb18c2013-08-26 15:06:04 +00001175 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1176 SDValue MergedValues[2] = {
1177 SplitVectorLoad(Op, DAG),
1178 Chain
1179 };
1180 return DAG.getMergeValues(MergedValues, 2, DL);
1181 }
1182
Tom Stellard365366f2013-01-23 02:09:06 +00001183 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Tom Stellardaf775432013-10-23 00:44:32 +00001184 if (ConstantBlock > -1 && LoadNode->getExtensionType() != ISD::SEXTLOAD) {
Tom Stellard365366f2013-01-23 02:09:06 +00001185 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001186 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1187 isa<Constant>(LoadNode->getSrcValue()) ||
1188 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001189 SDValue Slots[4];
1190 for (unsigned i = 0; i < 4; i++) {
1191 // We want Const position encoded with the following formula :
1192 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1193 // const_index is Ptr computed by llvm using an alignment of 16.
1194 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1195 // then div by 4 at the ISel step
1196 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1197 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1198 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1199 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001200 EVT NewVT = MVT::v4i32;
1201 unsigned NumElements = 4;
1202 if (VT.isVector()) {
1203 NewVT = VT;
1204 NumElements = VT.getVectorNumElements();
1205 }
1206 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001207 } else {
1208 // non constant ptr cant be folded, keeps it as a v4f32 load
1209 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001210 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001211 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001212 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001213 );
1214 }
1215
1216 if (!VT.isVector()) {
1217 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1218 DAG.getConstant(0, MVT::i32));
1219 }
1220
1221 SDValue MergedValues[2] = {
1222 Result,
1223 Chain
1224 };
1225 return DAG.getMergeValues(MergedValues, 2, DL);
1226 }
1227
Matt Arsenault909d0c02013-10-30 23:43:29 +00001228 // For most operations returning SDValue() will result in the node being
1229 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1230 // need to manually expand loads that may be legal in some address spaces and
1231 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1232 // compute shaders, since the data is sign extended when it is uploaded to the
1233 // buffer. However SEXT loads from other address spaces are not supported, so
1234 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001235 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1236 EVT MemVT = LoadNode->getMemoryVT();
1237 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1238 SDValue ShiftAmount =
1239 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1240 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1241 LoadNode->getPointerInfo(), MemVT,
1242 LoadNode->isVolatile(),
1243 LoadNode->isNonTemporal(),
1244 LoadNode->getAlignment());
1245 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1246 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1247
1248 SDValue MergedValues[2] = { Sra, Chain };
1249 return DAG.getMergeValues(MergedValues, 2, DL);
1250 }
1251
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001252 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1253 return SDValue();
1254 }
1255
1256 // Lowering for indirect addressing
1257 const MachineFunction &MF = DAG.getMachineFunction();
1258 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1259 getTargetMachine().getFrameLowering());
1260 unsigned StackWidth = TFL->getStackWidth(MF);
1261
1262 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1263
1264 if (VT.isVector()) {
1265 unsigned NumElemVT = VT.getVectorNumElements();
1266 EVT ElemVT = VT.getVectorElementType();
1267 SDValue Loads[4];
1268
1269 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1270 "vector width in load");
1271
1272 for (unsigned i = 0; i < NumElemVT; ++i) {
1273 unsigned Channel, PtrIncr;
1274 getStackAddress(StackWidth, i, Channel, PtrIncr);
1275 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1276 DAG.getConstant(PtrIncr, MVT::i32));
1277 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1278 Chain, Ptr,
1279 DAG.getTargetConstant(Channel, MVT::i32),
1280 Op.getOperand(2));
1281 }
1282 for (unsigned i = NumElemVT; i < 4; ++i) {
1283 Loads[i] = DAG.getUNDEF(ElemVT);
1284 }
1285 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1286 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1287 } else {
1288 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1289 Chain, Ptr,
1290 DAG.getTargetConstant(0, MVT::i32), // Channel
1291 Op.getOperand(2));
1292 }
1293
1294 SDValue Ops[2];
1295 Ops[0] = LoweredLoad;
1296 Ops[1] = Chain;
1297
1298 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001299}
Tom Stellard75aadc22012-12-11 21:25:42 +00001300
Tom Stellard75aadc22012-12-11 21:25:42 +00001301/// XXX Only kernel functions are supported, so we can assume for now that
1302/// every function is a kernel function, but in the future we should use
1303/// separate calling conventions for kernel and non-kernel functions.
1304SDValue R600TargetLowering::LowerFormalArguments(
1305 SDValue Chain,
1306 CallingConv::ID CallConv,
1307 bool isVarArg,
1308 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001309 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001310 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001311 SmallVector<CCValAssign, 16> ArgLocs;
1312 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1313 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001314 MachineFunction &MF = DAG.getMachineFunction();
1315 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001316
Tom Stellardaf775432013-10-23 00:44:32 +00001317 SmallVector<ISD::InputArg, 8> LocalIns;
1318
1319 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1320 LocalIns);
1321
1322 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001323
Tom Stellard1e803092013-07-23 01:48:18 +00001324 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001325 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001326 EVT VT = Ins[i].VT;
1327 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001328
Vincent Lejeunef143af32013-11-11 22:10:24 +00001329 if (ShaderType != ShaderType::COMPUTE) {
1330 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1331 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1332 InVals.push_back(Register);
1333 continue;
1334 }
1335
Tom Stellard75aadc22012-12-11 21:25:42 +00001336 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001337 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001338
1339 // The first 36 bytes of the input buffer contains information about
1340 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001341 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1342 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1343 MachinePointerInfo(UndefValue::get(PtrTy)),
1344 MemVT, false, false, 4);
1345 // 4 is the prefered alignment for
1346 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001347 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001348 }
1349 return Chain;
1350}
1351
Matt Arsenault758659232013-05-18 00:21:46 +00001352EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001353 if (!VT.isVector()) return MVT::i32;
1354 return VT.changeVectorElementTypeToInteger();
1355}
1356
Benjamin Kramer193960c2013-06-11 13:32:25 +00001357static SDValue
1358CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1359 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001360 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1361 assert(RemapSwizzle.empty());
1362 SDValue NewBldVec[4] = {
1363 VectorEntry.getOperand(0),
1364 VectorEntry.getOperand(1),
1365 VectorEntry.getOperand(2),
1366 VectorEntry.getOperand(3)
1367 };
1368
1369 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001370 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1371 // We mask write here to teach later passes that the ith element of this
1372 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1373 // break false dependencies and additionnaly make assembly easier to read.
1374 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001375 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1376 if (C->isZero()) {
1377 RemapSwizzle[i] = 4; // SEL_0
1378 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1379 } else if (C->isExactlyValue(1.0)) {
1380 RemapSwizzle[i] = 5; // SEL_1
1381 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1382 }
1383 }
1384
1385 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1386 continue;
1387 for (unsigned j = 0; j < i; j++) {
1388 if (NewBldVec[i] == NewBldVec[j]) {
1389 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1390 RemapSwizzle[i] = j;
1391 break;
1392 }
1393 }
1394 }
1395
1396 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1397 VectorEntry.getValueType(), NewBldVec, 4);
1398}
1399
Benjamin Kramer193960c2013-06-11 13:32:25 +00001400static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1401 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001402 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1403 assert(RemapSwizzle.empty());
1404 SDValue NewBldVec[4] = {
1405 VectorEntry.getOperand(0),
1406 VectorEntry.getOperand(1),
1407 VectorEntry.getOperand(2),
1408 VectorEntry.getOperand(3)
1409 };
1410 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001411 for (unsigned i = 0; i < 4; i++)
1412 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001413
1414 for (unsigned i = 0; i < 4; i++) {
1415 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1416 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1417 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001418 if (i == Idx) {
1419 isUnmovable[Idx] = true;
1420 continue;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001421 }
Vincent Lejeune301beb82013-10-13 17:56:04 +00001422 if (isUnmovable[Idx])
1423 continue;
1424 // Swap i and Idx
1425 std::swap(NewBldVec[Idx], NewBldVec[i]);
1426 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1427 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001428 }
1429 }
1430
1431 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1432 VectorEntry.getValueType(), NewBldVec, 4);
1433}
1434
1435
1436SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1437SDValue Swz[4], SelectionDAG &DAG) const {
1438 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1439 // Old -> New swizzle values
1440 DenseMap<unsigned, unsigned> SwizzleRemap;
1441
1442 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1443 for (unsigned i = 0; i < 4; i++) {
1444 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1445 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1446 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1447 }
1448
1449 SwizzleRemap.clear();
1450 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1451 for (unsigned i = 0; i < 4; i++) {
1452 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1453 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1454 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1455 }
1456
1457 return BuildVector;
1458}
1459
1460
Tom Stellard75aadc22012-12-11 21:25:42 +00001461//===----------------------------------------------------------------------===//
1462// Custom DAG Optimizations
1463//===----------------------------------------------------------------------===//
1464
1465SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1466 DAGCombinerInfo &DCI) const {
1467 SelectionDAG &DAG = DCI.DAG;
1468
1469 switch (N->getOpcode()) {
1470 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1471 case ISD::FP_ROUND: {
1472 SDValue Arg = N->getOperand(0);
1473 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001474 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001475 Arg.getOperand(0));
1476 }
1477 break;
1478 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001479
1480 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1481 // (i32 select_cc f32, f32, -1, 0 cc)
1482 //
1483 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1484 // this to one of the SET*_DX10 instructions.
1485 case ISD::FP_TO_SINT: {
1486 SDValue FNeg = N->getOperand(0);
1487 if (FNeg.getOpcode() != ISD::FNEG) {
1488 return SDValue();
1489 }
1490 SDValue SelectCC = FNeg.getOperand(0);
1491 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1492 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1493 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1494 !isHWTrueValue(SelectCC.getOperand(2)) ||
1495 !isHWFalseValue(SelectCC.getOperand(3))) {
1496 return SDValue();
1497 }
1498
Andrew Trickef9de2a2013-05-25 02:42:55 +00001499 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001500 SelectCC.getOperand(0), // LHS
1501 SelectCC.getOperand(1), // RHS
1502 DAG.getConstant(-1, MVT::i32), // True
1503 DAG.getConstant(0, MVT::i32), // Flase
1504 SelectCC.getOperand(4)); // CC
1505
1506 break;
1507 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001508
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001509 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1510 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001511 case ISD::INSERT_VECTOR_ELT: {
1512 SDValue InVec = N->getOperand(0);
1513 SDValue InVal = N->getOperand(1);
1514 SDValue EltNo = N->getOperand(2);
1515 SDLoc dl(N);
1516
1517 // If the inserted element is an UNDEF, just use the input vector.
1518 if (InVal.getOpcode() == ISD::UNDEF)
1519 return InVec;
1520
1521 EVT VT = InVec.getValueType();
1522
1523 // If we can't generate a legal BUILD_VECTOR, exit
1524 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1525 return SDValue();
1526
1527 // Check that we know which element is being inserted
1528 if (!isa<ConstantSDNode>(EltNo))
1529 return SDValue();
1530 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1531
1532 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1533 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1534 // vector elements.
1535 SmallVector<SDValue, 8> Ops;
1536 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1537 Ops.append(InVec.getNode()->op_begin(),
1538 InVec.getNode()->op_end());
1539 } else if (InVec.getOpcode() == ISD::UNDEF) {
1540 unsigned NElts = VT.getVectorNumElements();
1541 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1542 } else {
1543 return SDValue();
1544 }
1545
1546 // Insert the element
1547 if (Elt < Ops.size()) {
1548 // All the operands of BUILD_VECTOR must have the same type;
1549 // we enforce that here.
1550 EVT OpVT = Ops[0].getValueType();
1551 if (InVal.getValueType() != OpVT)
1552 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1553 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1554 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1555 Ops[Elt] = InVal;
1556 }
1557
1558 // Return the new vector
1559 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1560 VT, &Ops[0], Ops.size());
1561 }
1562
Tom Stellard365366f2013-01-23 02:09:06 +00001563 // Extract_vec (Build_vector) generated by custom lowering
1564 // also needs to be customly combined
1565 case ISD::EXTRACT_VECTOR_ELT: {
1566 SDValue Arg = N->getOperand(0);
1567 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1568 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1569 unsigned Element = Const->getZExtValue();
1570 return Arg->getOperand(Element);
1571 }
1572 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001573 if (Arg.getOpcode() == ISD::BITCAST &&
1574 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1575 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1576 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001577 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001578 Arg->getOperand(0).getOperand(Element));
1579 }
1580 }
Tom Stellard365366f2013-01-23 02:09:06 +00001581 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001582
1583 case ISD::SELECT_CC: {
1584 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1585 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001586 //
1587 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1588 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001589 SDValue LHS = N->getOperand(0);
1590 if (LHS.getOpcode() != ISD::SELECT_CC) {
1591 return SDValue();
1592 }
1593
1594 SDValue RHS = N->getOperand(1);
1595 SDValue True = N->getOperand(2);
1596 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001597 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001598
1599 if (LHS.getOperand(2).getNode() != True.getNode() ||
1600 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001601 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001602 return SDValue();
1603 }
1604
Tom Stellard5e524892013-03-08 15:37:11 +00001605 switch (NCC) {
1606 default: return SDValue();
1607 case ISD::SETNE: return LHS;
1608 case ISD::SETEQ: {
1609 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1610 LHSCC = ISD::getSetCCInverse(LHSCC,
1611 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001612 if (DCI.isBeforeLegalizeOps() ||
1613 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1614 return DAG.getSelectCC(SDLoc(N),
1615 LHS.getOperand(0),
1616 LHS.getOperand(1),
1617 LHS.getOperand(2),
1618 LHS.getOperand(3),
1619 LHSCC);
1620 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001621 }
Tom Stellard5e524892013-03-08 15:37:11 +00001622 }
Tom Stellardcd428182013-09-28 02:50:38 +00001623 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001624 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001625
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001626 case AMDGPUISD::EXPORT: {
1627 SDValue Arg = N->getOperand(1);
1628 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1629 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001630
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001631 SDValue NewArgs[8] = {
1632 N->getOperand(0), // Chain
1633 SDValue(),
1634 N->getOperand(2), // ArrayBase
1635 N->getOperand(3), // Type
1636 N->getOperand(4), // SWZ_X
1637 N->getOperand(5), // SWZ_Y
1638 N->getOperand(6), // SWZ_Z
1639 N->getOperand(7) // SWZ_W
1640 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001641 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001642 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001643 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001644 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001645 case AMDGPUISD::TEXTURE_FETCH: {
1646 SDValue Arg = N->getOperand(1);
1647 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1648 break;
1649
1650 SDValue NewArgs[19] = {
1651 N->getOperand(0),
1652 N->getOperand(1),
1653 N->getOperand(2),
1654 N->getOperand(3),
1655 N->getOperand(4),
1656 N->getOperand(5),
1657 N->getOperand(6),
1658 N->getOperand(7),
1659 N->getOperand(8),
1660 N->getOperand(9),
1661 N->getOperand(10),
1662 N->getOperand(11),
1663 N->getOperand(12),
1664 N->getOperand(13),
1665 N->getOperand(14),
1666 N->getOperand(15),
1667 N->getOperand(16),
1668 N->getOperand(17),
1669 N->getOperand(18),
1670 };
1671 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1672 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1673 NewArgs, 19);
1674 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001675 }
1676 return SDValue();
1677}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001678
1679static bool
1680FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001681 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001682 const R600InstrInfo *TII =
1683 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1684 if (!Src.isMachineOpcode())
1685 return false;
1686 switch (Src.getMachineOpcode()) {
1687 case AMDGPU::FNEG_R600:
1688 if (!Neg.getNode())
1689 return false;
1690 Src = Src.getOperand(0);
1691 Neg = DAG.getTargetConstant(1, MVT::i32);
1692 return true;
1693 case AMDGPU::FABS_R600:
1694 if (!Abs.getNode())
1695 return false;
1696 Src = Src.getOperand(0);
1697 Abs = DAG.getTargetConstant(1, MVT::i32);
1698 return true;
1699 case AMDGPU::CONST_COPY: {
1700 unsigned Opcode = ParentNode->getMachineOpcode();
1701 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1702
1703 if (!Sel.getNode())
1704 return false;
1705
1706 SDValue CstOffset = Src.getOperand(0);
1707 if (ParentNode->getValueType(0).isVector())
1708 return false;
1709
1710 // Gather constants values
1711 int SrcIndices[] = {
1712 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1713 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1714 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1715 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1716 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1717 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1718 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1719 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1720 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1721 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1722 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1723 };
1724 std::vector<unsigned> Consts;
1725 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1726 int OtherSrcIdx = SrcIndices[i];
1727 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1728 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1729 continue;
1730 if (HasDst) {
1731 OtherSrcIdx--;
1732 OtherSelIdx--;
1733 }
1734 if (RegisterSDNode *Reg =
1735 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1736 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1737 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1738 ParentNode->getOperand(OtherSelIdx));
1739 Consts.push_back(Cst->getZExtValue());
1740 }
1741 }
1742 }
1743
1744 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1745 Consts.push_back(Cst->getZExtValue());
1746 if (!TII->fitsConstReadLimitations(Consts)) {
1747 return false;
1748 }
1749
1750 Sel = CstOffset;
1751 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1752 return true;
1753 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001754 case AMDGPU::MOV_IMM_I32:
1755 case AMDGPU::MOV_IMM_F32: {
1756 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1757 uint64_t ImmValue = 0;
1758
1759
1760 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1761 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1762 float FloatValue = FPC->getValueAPF().convertToFloat();
1763 if (FloatValue == 0.0) {
1764 ImmReg = AMDGPU::ZERO;
1765 } else if (FloatValue == 0.5) {
1766 ImmReg = AMDGPU::HALF;
1767 } else if (FloatValue == 1.0) {
1768 ImmReg = AMDGPU::ONE;
1769 } else {
1770 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1771 }
1772 } else {
1773 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1774 uint64_t Value = C->getZExtValue();
1775 if (Value == 0) {
1776 ImmReg = AMDGPU::ZERO;
1777 } else if (Value == 1) {
1778 ImmReg = AMDGPU::ONE_INT;
1779 } else {
1780 ImmValue = Value;
1781 }
1782 }
1783
1784 // Check that we aren't already using an immediate.
1785 // XXX: It's possible for an instruction to have more than one
1786 // immediate operand, but this is not supported yet.
1787 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1788 if (!Imm.getNode())
1789 return false;
1790 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1791 assert(C);
1792 if (C->getZExtValue())
1793 return false;
1794 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1795 }
1796 Src = DAG.getRegister(ImmReg, MVT::i32);
1797 return true;
1798 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001799 default:
1800 return false;
1801 }
1802}
1803
1804
1805/// \brief Fold the instructions after selecting them
1806SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1807 SelectionDAG &DAG) const {
1808 const R600InstrInfo *TII =
1809 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1810 if (!Node->isMachineOpcode())
1811 return Node;
1812 unsigned Opcode = Node->getMachineOpcode();
1813 SDValue FakeOp;
1814
1815 std::vector<SDValue> Ops;
1816 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1817 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001818 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001819
1820 if (Opcode == AMDGPU::DOT_4) {
1821 int OperandIdx[] = {
1822 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1823 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1824 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1825 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1826 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1827 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1828 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1829 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001830 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001831 int NegIdx[] = {
1832 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1833 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1834 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1835 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1836 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1837 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1838 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1839 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1840 };
1841 int AbsIdx[] = {
1842 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1843 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1844 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1845 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1846 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1847 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1848 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1849 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1850 };
1851 for (unsigned i = 0; i < 8; i++) {
1852 if (OperandIdx[i] < 0)
1853 return Node;
1854 SDValue &Src = Ops[OperandIdx[i] - 1];
1855 SDValue &Neg = Ops[NegIdx[i] - 1];
1856 SDValue &Abs = Ops[AbsIdx[i] - 1];
1857 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1858 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1859 if (HasDst)
1860 SelIdx--;
1861 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001862 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1863 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1864 }
1865 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1866 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1867 SDValue &Src = Ops[i];
1868 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001869 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1870 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001871 } else if (Opcode == AMDGPU::CLAMP_R600) {
1872 SDValue Src = Node->getOperand(0);
1873 if (!Src.isMachineOpcode() ||
1874 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1875 return Node;
1876 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1877 AMDGPU::OpName::clamp);
1878 if (ClampIdx < 0)
1879 return Node;
1880 std::vector<SDValue> Ops;
1881 unsigned NumOp = Src.getNumOperands();
1882 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001883 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001884 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1885 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1886 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001887 } else {
1888 if (!TII->hasInstrModifiers(Opcode))
1889 return Node;
1890 int OperandIdx[] = {
1891 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1893 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1894 };
1895 int NegIdx[] = {
1896 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1897 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1898 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1899 };
1900 int AbsIdx[] = {
1901 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1902 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1903 -1
1904 };
1905 for (unsigned i = 0; i < 3; i++) {
1906 if (OperandIdx[i] < 0)
1907 return Node;
1908 SDValue &Src = Ops[OperandIdx[i] - 1];
1909 SDValue &Neg = Ops[NegIdx[i] - 1];
1910 SDValue FakeAbs;
1911 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1912 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1913 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001914 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1915 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001916 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001917 ImmIdx--;
1918 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001919 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001920 SDValue &Imm = Ops[ImmIdx];
1921 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001922 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1923 }
1924 }
1925
1926 return Node;
1927}