blob: ce6ac891ebffdd900b5e247eb0944fa497bb3b55 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
41 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042 setOperationAction(ISD::FADD, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000043 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000044 setOperationAction(ISD::FMUL, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000045 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000046 setOperationAction(ISD::FDIV, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000047 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000048 setOperationAction(ISD::FSUB, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000050 setOperationAction(ISD::FCOS, MVT::f32, Custom);
51 setOperationAction(ISD::FSIN, MVT::f32, Custom);
52
Tom Stellard75aadc22012-12-11 21:25:42 +000053 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000054 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard492ebea2013-03-08 15:37:07 +000056 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
57 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000058
59 setOperationAction(ISD::FSUB, MVT::f32, Expand);
60
61 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
62 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
63 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000064
Tom Stellard75aadc22012-12-11 21:25:42 +000065 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
66 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
67
Tom Stellarde8f9f282013-03-08 15:37:05 +000068 setOperationAction(ISD::SETCC, MVT::i32, Expand);
69 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000070 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
71
72 setOperationAction(ISD::SELECT, MVT::i32, Custom);
73 setOperationAction(ISD::SELECT, MVT::f32, Custom);
74
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000075 // Legalize loads and stores to the private address space.
76 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000077 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000078 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000079 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
80 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
81 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
82 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000083 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000084 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000085 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000086 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
87
Tom Stellard365366f2013-01-23 02:09:06 +000088 setOperationAction(ISD::LOAD, MVT::i32, Custom);
89 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000090 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
91
Tom Stellard75aadc22012-12-11 21:25:42 +000092 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000093 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000094 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000095 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +000096 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000097
Michel Danzer49812b52013-07-10 16:37:07 +000098 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
99
Tom Stellardb852af52013-03-08 15:37:03 +0000100 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000101 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +0000102 setSchedulingPreference(Sched::VLIW);
103}
104
105MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
106 MachineInstr * MI, MachineBasicBlock * BB) const {
107 MachineFunction * MF = BB->getParent();
108 MachineRegisterInfo &MRI = MF->getRegInfo();
109 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000110 const R600InstrInfo *TII =
111 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000112
113 switch (MI->getOpcode()) {
114 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000115 case AMDGPU::CLAMP_R600: {
116 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
117 AMDGPU::MOV,
118 MI->getOperand(0).getReg(),
119 MI->getOperand(1).getReg());
120 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
121 break;
122 }
123
124 case AMDGPU::FABS_R600: {
125 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
126 AMDGPU::MOV,
127 MI->getOperand(0).getReg(),
128 MI->getOperand(1).getReg());
129 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
130 break;
131 }
132
133 case AMDGPU::FNEG_R600: {
134 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
135 AMDGPU::MOV,
136 MI->getOperand(0).getReg(),
137 MI->getOperand(1).getReg());
138 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
139 break;
140 }
141
Tom Stellard75aadc22012-12-11 21:25:42 +0000142 case AMDGPU::MASK_WRITE: {
143 unsigned maskedRegister = MI->getOperand(0).getReg();
144 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
145 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
146 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
147 break;
148 }
149
Tom Stellardc026e8b2013-06-28 15:47:08 +0000150 case AMDGPU::LDS_READ_RET: {
151 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
152 TII->get(MI->getOpcode()),
153 AMDGPU::OQAP);
154 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
155 NewMI.addOperand(MI->getOperand(i));
156 }
157 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 AMDGPU::OQAP);
160 break;
161 }
162
Tom Stellard75aadc22012-12-11 21:25:42 +0000163 case AMDGPU::MOV_IMM_F32:
164 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
165 MI->getOperand(1).getFPImm()->getValueAPF()
166 .bitcastToAPInt().getZExtValue());
167 break;
168 case AMDGPU::MOV_IMM_I32:
169 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
170 MI->getOperand(1).getImm());
171 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000172 case AMDGPU::CONST_COPY: {
173 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
174 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000175 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000176 MI->getOperand(1).getImm());
177 break;
178 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000179
180 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000181 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
183 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
184
185 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
186 .addOperand(MI->getOperand(0))
187 .addOperand(MI->getOperand(1))
188 .addImm(EOP); // Set End of program bit
189 break;
190 }
191
Tom Stellard75aadc22012-12-11 21:25:42 +0000192 case AMDGPU::TXD: {
193 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
194 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000195 MachineOperand &RID = MI->getOperand(4);
196 MachineOperand &SID = MI->getOperand(5);
197 unsigned TextureId = MI->getOperand(6).getImm();
198 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
199 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000200
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000201 switch (TextureId) {
202 case 5: // Rect
203 CTX = CTY = 0;
204 break;
205 case 6: // Shadow1D
206 SrcW = SrcZ;
207 break;
208 case 7: // Shadow2D
209 SrcW = SrcZ;
210 break;
211 case 8: // ShadowRect
212 CTX = CTY = 0;
213 SrcW = SrcZ;
214 break;
215 case 9: // 1DArray
216 SrcZ = SrcY;
217 CTZ = 0;
218 break;
219 case 10: // 2DArray
220 CTZ = 0;
221 break;
222 case 11: // Shadow1DArray
223 SrcZ = SrcY;
224 CTZ = 0;
225 break;
226 case 12: // Shadow2DArray
227 CTZ = 0;
228 break;
229 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000230 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
231 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000232 .addImm(SrcX)
233 .addImm(SrcY)
234 .addImm(SrcZ)
235 .addImm(SrcW)
236 .addImm(0)
237 .addImm(0)
238 .addImm(0)
239 .addImm(0)
240 .addImm(1)
241 .addImm(2)
242 .addImm(3)
243 .addOperand(RID)
244 .addOperand(SID)
245 .addImm(CTX)
246 .addImm(CTY)
247 .addImm(CTZ)
248 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000249 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
250 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000251 .addImm(SrcX)
252 .addImm(SrcY)
253 .addImm(SrcZ)
254 .addImm(SrcW)
255 .addImm(0)
256 .addImm(0)
257 .addImm(0)
258 .addImm(0)
259 .addImm(1)
260 .addImm(2)
261 .addImm(3)
262 .addOperand(RID)
263 .addOperand(SID)
264 .addImm(CTX)
265 .addImm(CTY)
266 .addImm(CTZ)
267 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000268 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
269 .addOperand(MI->getOperand(0))
270 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000271 .addImm(SrcX)
272 .addImm(SrcY)
273 .addImm(SrcZ)
274 .addImm(SrcW)
275 .addImm(0)
276 .addImm(0)
277 .addImm(0)
278 .addImm(0)
279 .addImm(1)
280 .addImm(2)
281 .addImm(3)
282 .addOperand(RID)
283 .addOperand(SID)
284 .addImm(CTX)
285 .addImm(CTY)
286 .addImm(CTZ)
287 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000288 .addReg(T0, RegState::Implicit)
289 .addReg(T1, RegState::Implicit);
290 break;
291 }
292
293 case AMDGPU::TXD_SHADOW: {
294 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
295 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 MachineOperand &RID = MI->getOperand(4);
297 MachineOperand &SID = MI->getOperand(5);
298 unsigned TextureId = MI->getOperand(6).getImm();
299 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
300 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
301
302 switch (TextureId) {
303 case 5: // Rect
304 CTX = CTY = 0;
305 break;
306 case 6: // Shadow1D
307 SrcW = SrcZ;
308 break;
309 case 7: // Shadow2D
310 SrcW = SrcZ;
311 break;
312 case 8: // ShadowRect
313 CTX = CTY = 0;
314 SrcW = SrcZ;
315 break;
316 case 9: // 1DArray
317 SrcZ = SrcY;
318 CTZ = 0;
319 break;
320 case 10: // 2DArray
321 CTZ = 0;
322 break;
323 case 11: // Shadow1DArray
324 SrcZ = SrcY;
325 CTZ = 0;
326 break;
327 case 12: // Shadow2DArray
328 CTZ = 0;
329 break;
330 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000331
332 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
333 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000334 .addImm(SrcX)
335 .addImm(SrcY)
336 .addImm(SrcZ)
337 .addImm(SrcW)
338 .addImm(0)
339 .addImm(0)
340 .addImm(0)
341 .addImm(0)
342 .addImm(1)
343 .addImm(2)
344 .addImm(3)
345 .addOperand(RID)
346 .addOperand(SID)
347 .addImm(CTX)
348 .addImm(CTY)
349 .addImm(CTZ)
350 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000351 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
352 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000353 .addImm(SrcX)
354 .addImm(SrcY)
355 .addImm(SrcZ)
356 .addImm(SrcW)
357 .addImm(0)
358 .addImm(0)
359 .addImm(0)
360 .addImm(0)
361 .addImm(1)
362 .addImm(2)
363 .addImm(3)
364 .addOperand(RID)
365 .addOperand(SID)
366 .addImm(CTX)
367 .addImm(CTY)
368 .addImm(CTZ)
369 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000370 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
371 .addOperand(MI->getOperand(0))
372 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000373 .addImm(SrcX)
374 .addImm(SrcY)
375 .addImm(SrcZ)
376 .addImm(SrcW)
377 .addImm(0)
378 .addImm(0)
379 .addImm(0)
380 .addImm(0)
381 .addImm(1)
382 .addImm(2)
383 .addImm(3)
384 .addOperand(RID)
385 .addOperand(SID)
386 .addImm(CTX)
387 .addImm(CTY)
388 .addImm(CTZ)
389 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000390 .addReg(T0, RegState::Implicit)
391 .addReg(T1, RegState::Implicit);
392 break;
393 }
394
395 case AMDGPU::BRANCH:
396 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000397 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000398 break;
399
400 case AMDGPU::BRANCH_COND_f32: {
401 MachineInstr *NewMI =
402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
403 AMDGPU::PREDICATE_BIT)
404 .addOperand(MI->getOperand(1))
405 .addImm(OPCODE_IS_NOT_ZERO)
406 .addImm(0); // Flags
407 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000408 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000409 .addOperand(MI->getOperand(0))
410 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
411 break;
412 }
413
414 case AMDGPU::BRANCH_COND_i32: {
415 MachineInstr *NewMI =
416 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
417 AMDGPU::PREDICATE_BIT)
418 .addOperand(MI->getOperand(1))
419 .addImm(OPCODE_IS_NOT_ZERO_INT)
420 .addImm(0); // Flags
421 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000422 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 .addOperand(MI->getOperand(0))
424 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
425 break;
426 }
427
Tom Stellard75aadc22012-12-11 21:25:42 +0000428 case AMDGPU::EG_ExportSwz:
429 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000430 // Instruction is left unmodified if its not the last one of its type
431 bool isLastInstructionOfItsType = true;
432 unsigned InstExportType = MI->getOperand(1).getImm();
433 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
434 EndBlock = BB->end(); NextExportInst != EndBlock;
435 NextExportInst = llvm::next(NextExportInst)) {
436 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
437 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
438 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
439 .getImm();
440 if (CurrentInstExportType == InstExportType) {
441 isLastInstructionOfItsType = false;
442 break;
443 }
444 }
445 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000446 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000447 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 return BB;
449 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
450 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
451 .addOperand(MI->getOperand(0))
452 .addOperand(MI->getOperand(1))
453 .addOperand(MI->getOperand(2))
454 .addOperand(MI->getOperand(3))
455 .addOperand(MI->getOperand(4))
456 .addOperand(MI->getOperand(5))
457 .addOperand(MI->getOperand(6))
458 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000459 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000460 break;
461 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000462 case AMDGPU::RETURN: {
463 // RETURN instructions must have the live-out registers as implicit uses,
464 // otherwise they appear dead.
465 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
466 MachineInstrBuilder MIB(*MF, MI);
467 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
468 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
469 return BB;
470 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 }
472
473 MI->eraseFromParent();
474 return BB;
475}
476
477//===----------------------------------------------------------------------===//
478// Custom DAG Lowering Operations
479//===----------------------------------------------------------------------===//
480
Tom Stellard75aadc22012-12-11 21:25:42 +0000481SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000482 MachineFunction &MF = DAG.getMachineFunction();
483 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 switch (Op.getOpcode()) {
485 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000486 case ISD::FCOS:
487 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000488 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
489 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000490 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000491 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000492 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000493 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 case ISD::INTRINSIC_VOID: {
495 SDValue Chain = Op.getOperand(0);
496 unsigned IntrinsicID =
497 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
498 switch (IntrinsicID) {
499 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000500 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
501 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000502 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000503 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000505 case AMDGPUIntrinsic::R600_store_swizzle: {
506 const SDValue Args[8] = {
507 Chain,
508 Op.getOperand(2), // Export Value
509 Op.getOperand(3), // ArrayBase
510 Op.getOperand(4), // Type
511 DAG.getConstant(0, MVT::i32), // SWZ_X
512 DAG.getConstant(1, MVT::i32), // SWZ_Y
513 DAG.getConstant(2, MVT::i32), // SWZ_Z
514 DAG.getConstant(3, MVT::i32) // SWZ_W
515 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000516 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000517 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000519
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 // default for switch(IntrinsicID)
521 default: break;
522 }
523 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
524 break;
525 }
526 case ISD::INTRINSIC_WO_CHAIN: {
527 unsigned IntrinsicID =
528 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
529 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000530 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000531 switch(IntrinsicID) {
532 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
533 case AMDGPUIntrinsic::R600_load_input: {
534 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
535 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000536 MachineFunction &MF = DAG.getMachineFunction();
537 MachineRegisterInfo &MRI = MF.getRegInfo();
538 MRI.addLiveIn(Reg);
539 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000540 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000541 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000542
543 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000545 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
546 MachineSDNode *interp;
547 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000548 const MachineFunction &MF = DAG.getMachineFunction();
549 const R600InstrInfo *TII =
550 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000551 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
552 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
553 return DAG.getTargetExtractSubreg(
554 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
555 DL, MVT::f32, SDValue(interp, 0));
556 }
557
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000558 MachineFunction &MF = DAG.getMachineFunction();
559 MachineRegisterInfo &MRI = MF.getRegInfo();
560 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
561 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
562 MRI.addLiveIn(RegisterI);
563 MRI.addLiveIn(RegisterJ);
564 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
565 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
566 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
567 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
568
Tom Stellard41afe6a2013-02-05 17:09:14 +0000569 if (slot % 4 < 2)
570 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
571 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000572 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000573 else
574 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
575 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000576 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000577 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000578 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000579 case AMDGPUIntrinsic::R600_tex:
580 case AMDGPUIntrinsic::R600_texc:
581 case AMDGPUIntrinsic::R600_txl:
582 case AMDGPUIntrinsic::R600_txlc:
583 case AMDGPUIntrinsic::R600_txb:
584 case AMDGPUIntrinsic::R600_txbc:
585 case AMDGPUIntrinsic::R600_txf:
586 case AMDGPUIntrinsic::R600_txq:
587 case AMDGPUIntrinsic::R600_ddx:
588 case AMDGPUIntrinsic::R600_ddy: {
589 unsigned TextureOp;
590 switch (IntrinsicID) {
591 case AMDGPUIntrinsic::R600_tex:
592 TextureOp = 0;
593 break;
594 case AMDGPUIntrinsic::R600_texc:
595 TextureOp = 1;
596 break;
597 case AMDGPUIntrinsic::R600_txl:
598 TextureOp = 2;
599 break;
600 case AMDGPUIntrinsic::R600_txlc:
601 TextureOp = 3;
602 break;
603 case AMDGPUIntrinsic::R600_txb:
604 TextureOp = 4;
605 break;
606 case AMDGPUIntrinsic::R600_txbc:
607 TextureOp = 5;
608 break;
609 case AMDGPUIntrinsic::R600_txf:
610 TextureOp = 6;
611 break;
612 case AMDGPUIntrinsic::R600_txq:
613 TextureOp = 7;
614 break;
615 case AMDGPUIntrinsic::R600_ddx:
616 TextureOp = 8;
617 break;
618 case AMDGPUIntrinsic::R600_ddy:
619 TextureOp = 9;
620 break;
621 default:
622 llvm_unreachable("Unknow Texture Operation");
623 }
624
625 SDValue TexArgs[19] = {
626 DAG.getConstant(TextureOp, MVT::i32),
627 Op.getOperand(1),
628 DAG.getConstant(0, MVT::i32),
629 DAG.getConstant(1, MVT::i32),
630 DAG.getConstant(2, MVT::i32),
631 DAG.getConstant(3, MVT::i32),
632 Op.getOperand(2),
633 Op.getOperand(3),
634 Op.getOperand(4),
635 DAG.getConstant(0, MVT::i32),
636 DAG.getConstant(1, MVT::i32),
637 DAG.getConstant(2, MVT::i32),
638 DAG.getConstant(3, MVT::i32),
639 Op.getOperand(5),
640 Op.getOperand(6),
641 Op.getOperand(7),
642 Op.getOperand(8),
643 Op.getOperand(9),
644 Op.getOperand(10)
645 };
646 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
647 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000648 case AMDGPUIntrinsic::AMDGPU_dp4: {
649 SDValue Args[8] = {
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
651 DAG.getConstant(0, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
653 DAG.getConstant(0, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
655 DAG.getConstant(1, MVT::i32)),
656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
657 DAG.getConstant(1, MVT::i32)),
658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
659 DAG.getConstant(2, MVT::i32)),
660 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
661 DAG.getConstant(2, MVT::i32)),
662 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
663 DAG.getConstant(3, MVT::i32)),
664 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
665 DAG.getConstant(3, MVT::i32))
666 };
667 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
668 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000669
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000670 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000676 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000677 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000682 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000683 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000684 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000685 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return LowerImplicitParameter(DAG, VT, DL, 8);
688
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
697 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
700 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000701 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000702 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
703 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000704 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000705 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
706 AMDGPU::T0_Z, VT);
707 }
708 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
709 break;
710 }
711 } // end switch(Op.getOpcode())
712 return SDValue();
713}
714
715void R600TargetLowering::ReplaceNodeResults(SDNode *N,
716 SmallVectorImpl<SDValue> &Results,
717 SelectionDAG &DAG) const {
718 switch (N->getOpcode()) {
719 default: return;
720 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000721 return;
722 case ISD::LOAD: {
723 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
724 Results.push_back(SDValue(Node, 0));
725 Results.push_back(SDValue(Node, 1));
726 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
727 // function
728 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
729 return;
730 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000731 case ISD::STORE:
732 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
733 Results.push_back(SDValue(Node, 0));
734 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000735 }
736}
737
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000738SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
739 // On hw >= R700, COS/SIN input must be between -1. and 1.
740 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
741 EVT VT = Op.getValueType();
742 SDValue Arg = Op.getOperand(0);
743 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
744 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
745 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
746 DAG.getConstantFP(0.15915494309, MVT::f32)),
747 DAG.getConstantFP(0.5, MVT::f32)));
748 unsigned TrigNode;
749 switch (Op.getOpcode()) {
750 case ISD::FCOS:
751 TrigNode = AMDGPUISD::COS_HW;
752 break;
753 case ISD::FSIN:
754 TrigNode = AMDGPUISD::SIN_HW;
755 break;
756 default:
757 llvm_unreachable("Wrong trig opcode");
758 }
759 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
760 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
761 DAG.getConstantFP(-0.5, MVT::f32)));
762 if (Gen >= AMDGPUSubtarget::R700)
763 return TrigVal;
764 // On R600 hw, COS/SIN input must be between -Pi and Pi.
765 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
766 DAG.getConstantFP(3.14159265359, MVT::f32));
767}
768
Tom Stellard75aadc22012-12-11 21:25:42 +0000769SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
770 return DAG.getNode(
771 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000772 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 MVT::i1,
774 Op, DAG.getConstantFP(0.0f, MVT::f32),
775 DAG.getCondCode(ISD::SETNE)
776 );
777}
778
Tom Stellard75aadc22012-12-11 21:25:42 +0000779SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000780 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 unsigned DwordOffset) const {
782 unsigned ByteOffset = DwordOffset * 4;
783 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000784 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000785
786 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
787 assert(isInt<16>(ByteOffset));
788
789 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
790 DAG.getConstant(ByteOffset, MVT::i32), // PTR
791 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
792 false, false, false, 0);
793}
794
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000795SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
796
797 MachineFunction &MF = DAG.getMachineFunction();
798 const AMDGPUFrameLowering *TFL =
799 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
800
801 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
802 assert(FIN);
803
804 unsigned FrameIndex = FIN->getIndex();
805 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
806 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
807}
808
Tom Stellard75aadc22012-12-11 21:25:42 +0000809bool R600TargetLowering::isZero(SDValue Op) const {
810 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
811 return Cst->isNullValue();
812 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
813 return CstFP->isZero();
814 } else {
815 return false;
816 }
817}
818
819SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000820 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000821 EVT VT = Op.getValueType();
822
823 SDValue LHS = Op.getOperand(0);
824 SDValue RHS = Op.getOperand(1);
825 SDValue True = Op.getOperand(2);
826 SDValue False = Op.getOperand(3);
827 SDValue CC = Op.getOperand(4);
828 SDValue Temp;
829
830 // LHS and RHS are guaranteed to be the same value type
831 EVT CompareVT = LHS.getValueType();
832
833 // Check if we can lower this to a native operation.
834
Tom Stellard2add82d2013-03-08 15:37:09 +0000835 // Try to lower to a SET* instruction:
836 //
837 // SET* can match the following patterns:
838 //
839 // select_cc f32, f32, -1, 0, cc_any
840 // select_cc f32, f32, 1.0f, 0.0f, cc_any
841 // select_cc i32, i32, -1, 0, cc_any
842 //
843
844 // Move hardware True/False values to the correct operand.
845 if (isHWTrueValue(False) && isHWFalseValue(True)) {
846 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
847 std::swap(False, True);
848 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
849 }
850
851 if (isHWTrueValue(True) && isHWFalseValue(False) &&
852 (CompareVT == VT || VT == MVT::i32)) {
853 // This can be matched by a SET* instruction.
854 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
855 }
856
Tom Stellard75aadc22012-12-11 21:25:42 +0000857 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000858 //
859 // CND* can match the following patterns:
860 //
861 // select_cc f32, 0.0, f32, f32, cc_any
862 // select_cc f32, 0.0, i32, i32, cc_any
863 // select_cc i32, 0, f32, f32, cc_any
864 // select_cc i32, 0, i32, i32, cc_any
865 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000866 if (isZero(LHS) || isZero(RHS)) {
867 SDValue Cond = (isZero(LHS) ? RHS : LHS);
868 SDValue Zero = (isZero(LHS) ? LHS : RHS);
869 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
870 if (CompareVT != VT) {
871 // Bitcast True / False to the correct types. This will end up being
872 // a nop, but it allows us to define only a single pattern in the
873 // .TD files for each CND* instruction rather than having to have
874 // one pattern for integer True/False and one for fp True/False
875 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
876 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
877 }
878 if (isZero(LHS)) {
879 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
880 }
881
882 switch (CCOpcode) {
883 case ISD::SETONE:
884 case ISD::SETUNE:
885 case ISD::SETNE:
886 case ISD::SETULE:
887 case ISD::SETULT:
888 case ISD::SETOLE:
889 case ISD::SETOLT:
890 case ISD::SETLE:
891 case ISD::SETLT:
892 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
893 Temp = True;
894 True = False;
895 False = Temp;
896 break;
897 default:
898 break;
899 }
900 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
901 Cond, Zero,
902 True, False,
903 DAG.getCondCode(CCOpcode));
904 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
905 }
906
Tom Stellard75aadc22012-12-11 21:25:42 +0000907
908 // Possible Min/Max pattern
909 SDValue MinMax = LowerMinMax(Op, DAG);
910 if (MinMax.getNode()) {
911 return MinMax;
912 }
913
914 // If we make it this for it means we have no native instructions to handle
915 // this SELECT_CC, so we must lower it.
916 SDValue HWTrue, HWFalse;
917
918 if (CompareVT == MVT::f32) {
919 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
920 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
921 } else if (CompareVT == MVT::i32) {
922 HWTrue = DAG.getConstant(-1, CompareVT);
923 HWFalse = DAG.getConstant(0, CompareVT);
924 }
925 else {
926 assert(!"Unhandled value type in LowerSELECT_CC");
927 }
928
929 // Lower this unsupported SELECT_CC into a combination of two supported
930 // SELECT_CC operations.
931 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
932
933 return DAG.getNode(ISD::SELECT_CC, DL, VT,
934 Cond, HWFalse,
935 True, False,
936 DAG.getCondCode(ISD::SETNE));
937}
938
939SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
940 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000941 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000942 Op.getValueType(),
943 Op.getOperand(0),
944 DAG.getConstant(0, MVT::i32),
945 Op.getOperand(1),
946 Op.getOperand(2),
947 DAG.getCondCode(ISD::SETNE));
948}
949
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000950/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
951/// convert these pointers to a register index. Each register holds
952/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
953/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
954/// for indirect addressing.
955SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
956 unsigned StackWidth,
957 SelectionDAG &DAG) const {
958 unsigned SRLPad;
959 switch(StackWidth) {
960 case 1:
961 SRLPad = 2;
962 break;
963 case 2:
964 SRLPad = 3;
965 break;
966 case 4:
967 SRLPad = 4;
968 break;
969 default: llvm_unreachable("Invalid stack width");
970 }
971
Andrew Trickef9de2a2013-05-25 02:42:55 +0000972 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000973 DAG.getConstant(SRLPad, MVT::i32));
974}
975
976void R600TargetLowering::getStackAddress(unsigned StackWidth,
977 unsigned ElemIdx,
978 unsigned &Channel,
979 unsigned &PtrIncr) const {
980 switch (StackWidth) {
981 default:
982 case 1:
983 Channel = 0;
984 if (ElemIdx > 0) {
985 PtrIncr = 1;
986 } else {
987 PtrIncr = 0;
988 }
989 break;
990 case 2:
991 Channel = ElemIdx % 2;
992 if (ElemIdx == 2) {
993 PtrIncr = 1;
994 } else {
995 PtrIncr = 0;
996 }
997 break;
998 case 4:
999 Channel = ElemIdx;
1000 PtrIncr = 0;
1001 break;
1002 }
1003}
1004
Tom Stellard75aadc22012-12-11 21:25:42 +00001005SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001006 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001007 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1008 SDValue Chain = Op.getOperand(0);
1009 SDValue Value = Op.getOperand(1);
1010 SDValue Ptr = Op.getOperand(2);
1011
1012 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
1013 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
1014 // Convert pointer from byte address to dword address.
1015 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1016 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1017 Ptr, DAG.getConstant(2, MVT::i32)));
1018
1019 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1020 assert(!"Truncated and indexed stores not supported yet");
1021 } else {
1022 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1023 }
1024 return Chain;
1025 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001026
1027 EVT ValueVT = Value.getValueType();
1028
1029 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1030 return SDValue();
1031 }
1032
1033 // Lowering for indirect addressing
1034
1035 const MachineFunction &MF = DAG.getMachineFunction();
1036 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1037 getTargetMachine().getFrameLowering());
1038 unsigned StackWidth = TFL->getStackWidth(MF);
1039
1040 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1041
1042 if (ValueVT.isVector()) {
1043 unsigned NumElemVT = ValueVT.getVectorNumElements();
1044 EVT ElemVT = ValueVT.getVectorElementType();
1045 SDValue Stores[4];
1046
1047 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1048 "vector width in load");
1049
1050 for (unsigned i = 0; i < NumElemVT; ++i) {
1051 unsigned Channel, PtrIncr;
1052 getStackAddress(StackWidth, i, Channel, PtrIncr);
1053 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1054 DAG.getConstant(PtrIncr, MVT::i32));
1055 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1056 Value, DAG.getConstant(i, MVT::i32));
1057
1058 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1059 Chain, Elem, Ptr,
1060 DAG.getTargetConstant(Channel, MVT::i32));
1061 }
1062 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1063 } else {
1064 if (ValueVT == MVT::i8) {
1065 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1066 }
1067 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001068 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001069 }
1070
1071 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001072}
1073
Tom Stellard365366f2013-01-23 02:09:06 +00001074// return (512 + (kc_bank << 12)
1075static int
1076ConstantAddressBlock(unsigned AddressSpace) {
1077 switch (AddressSpace) {
1078 case AMDGPUAS::CONSTANT_BUFFER_0:
1079 return 512;
1080 case AMDGPUAS::CONSTANT_BUFFER_1:
1081 return 512 + 4096;
1082 case AMDGPUAS::CONSTANT_BUFFER_2:
1083 return 512 + 4096 * 2;
1084 case AMDGPUAS::CONSTANT_BUFFER_3:
1085 return 512 + 4096 * 3;
1086 case AMDGPUAS::CONSTANT_BUFFER_4:
1087 return 512 + 4096 * 4;
1088 case AMDGPUAS::CONSTANT_BUFFER_5:
1089 return 512 + 4096 * 5;
1090 case AMDGPUAS::CONSTANT_BUFFER_6:
1091 return 512 + 4096 * 6;
1092 case AMDGPUAS::CONSTANT_BUFFER_7:
1093 return 512 + 4096 * 7;
1094 case AMDGPUAS::CONSTANT_BUFFER_8:
1095 return 512 + 4096 * 8;
1096 case AMDGPUAS::CONSTANT_BUFFER_9:
1097 return 512 + 4096 * 9;
1098 case AMDGPUAS::CONSTANT_BUFFER_10:
1099 return 512 + 4096 * 10;
1100 case AMDGPUAS::CONSTANT_BUFFER_11:
1101 return 512 + 4096 * 11;
1102 case AMDGPUAS::CONSTANT_BUFFER_12:
1103 return 512 + 4096 * 12;
1104 case AMDGPUAS::CONSTANT_BUFFER_13:
1105 return 512 + 4096 * 13;
1106 case AMDGPUAS::CONSTANT_BUFFER_14:
1107 return 512 + 4096 * 14;
1108 case AMDGPUAS::CONSTANT_BUFFER_15:
1109 return 512 + 4096 * 15;
1110 default:
1111 return -1;
1112 }
1113}
1114
1115SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1116{
1117 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001118 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001119 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1120 SDValue Chain = Op.getOperand(0);
1121 SDValue Ptr = Op.getOperand(1);
1122 SDValue LoweredLoad;
1123
1124 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1125 if (ConstantBlock > -1) {
1126 SDValue Result;
1127 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001128 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1129 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001130 SDValue Slots[4];
1131 for (unsigned i = 0; i < 4; i++) {
1132 // We want Const position encoded with the following formula :
1133 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1134 // const_index is Ptr computed by llvm using an alignment of 16.
1135 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1136 // then div by 4 at the ISel step
1137 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1138 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1139 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1140 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001141 EVT NewVT = MVT::v4i32;
1142 unsigned NumElements = 4;
1143 if (VT.isVector()) {
1144 NewVT = VT;
1145 NumElements = VT.getVectorNumElements();
1146 }
1147 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001148 } else {
1149 // non constant ptr cant be folded, keeps it as a v4f32 load
1150 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001151 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001152 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001153 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001154 );
1155 }
1156
1157 if (!VT.isVector()) {
1158 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1159 DAG.getConstant(0, MVT::i32));
1160 }
1161
1162 SDValue MergedValues[2] = {
1163 Result,
1164 Chain
1165 };
1166 return DAG.getMergeValues(MergedValues, 2, DL);
1167 }
1168
Tom Stellard84021442013-07-23 01:48:24 +00001169 // For most operations returning SDValue() will result int he node being
1170 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1171 // we need to manually expand loads that may be legal in some address spaces
1172 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1173 // for compute shaders, since the data is sign extended when it is uploaded
1174 // to the buffer. Howerver SEXT loads from other addresspaces are not
1175 // supported, so we need to expand them here.
1176 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1177 EVT MemVT = LoadNode->getMemoryVT();
1178 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1179 SDValue ShiftAmount =
1180 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1181 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1182 LoadNode->getPointerInfo(), MemVT,
1183 LoadNode->isVolatile(),
1184 LoadNode->isNonTemporal(),
1185 LoadNode->getAlignment());
1186 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1187 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1188
1189 SDValue MergedValues[2] = { Sra, Chain };
1190 return DAG.getMergeValues(MergedValues, 2, DL);
1191 }
1192
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001193 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1194 return SDValue();
1195 }
1196
1197 // Lowering for indirect addressing
1198 const MachineFunction &MF = DAG.getMachineFunction();
1199 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1200 getTargetMachine().getFrameLowering());
1201 unsigned StackWidth = TFL->getStackWidth(MF);
1202
1203 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1204
1205 if (VT.isVector()) {
1206 unsigned NumElemVT = VT.getVectorNumElements();
1207 EVT ElemVT = VT.getVectorElementType();
1208 SDValue Loads[4];
1209
1210 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1211 "vector width in load");
1212
1213 for (unsigned i = 0; i < NumElemVT; ++i) {
1214 unsigned Channel, PtrIncr;
1215 getStackAddress(StackWidth, i, Channel, PtrIncr);
1216 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1217 DAG.getConstant(PtrIncr, MVT::i32));
1218 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1219 Chain, Ptr,
1220 DAG.getTargetConstant(Channel, MVT::i32),
1221 Op.getOperand(2));
1222 }
1223 for (unsigned i = NumElemVT; i < 4; ++i) {
1224 Loads[i] = DAG.getUNDEF(ElemVT);
1225 }
1226 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1227 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1228 } else {
1229 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1230 Chain, Ptr,
1231 DAG.getTargetConstant(0, MVT::i32), // Channel
1232 Op.getOperand(2));
1233 }
1234
1235 SDValue Ops[2];
1236 Ops[0] = LoweredLoad;
1237 Ops[1] = Chain;
1238
1239 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001240}
Tom Stellard75aadc22012-12-11 21:25:42 +00001241
Tom Stellard75aadc22012-12-11 21:25:42 +00001242/// XXX Only kernel functions are supported, so we can assume for now that
1243/// every function is a kernel function, but in the future we should use
1244/// separate calling conventions for kernel and non-kernel functions.
1245SDValue R600TargetLowering::LowerFormalArguments(
1246 SDValue Chain,
1247 CallingConv::ID CallConv,
1248 bool isVarArg,
1249 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001250 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001251 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001252 SmallVector<CCValAssign, 16> ArgLocs;
1253 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1254 getTargetMachine(), ArgLocs, *DAG.getContext());
1255
1256 AnalyzeFormalArguments(CCInfo, Ins);
1257
Tom Stellard1e803092013-07-23 01:48:18 +00001258 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001259 CCValAssign &VA = ArgLocs[i];
1260 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001261
Tom Stellard75aadc22012-12-11 21:25:42 +00001262 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001263 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001264
1265 // The first 36 bytes of the input buffer contains information about
1266 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001267 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001268 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001269 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1270 false, false, 4); // 4 is the prefered alignment for
1271 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001272 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001273 }
1274 return Chain;
1275}
1276
Matt Arsenault758659232013-05-18 00:21:46 +00001277EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001278 if (!VT.isVector()) return MVT::i32;
1279 return VT.changeVectorElementTypeToInteger();
1280}
1281
Benjamin Kramer193960c2013-06-11 13:32:25 +00001282static SDValue
1283CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1284 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001285 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1286 assert(RemapSwizzle.empty());
1287 SDValue NewBldVec[4] = {
1288 VectorEntry.getOperand(0),
1289 VectorEntry.getOperand(1),
1290 VectorEntry.getOperand(2),
1291 VectorEntry.getOperand(3)
1292 };
1293
1294 for (unsigned i = 0; i < 4; i++) {
1295 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1296 if (C->isZero()) {
1297 RemapSwizzle[i] = 4; // SEL_0
1298 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1299 } else if (C->isExactlyValue(1.0)) {
1300 RemapSwizzle[i] = 5; // SEL_1
1301 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1302 }
1303 }
1304
1305 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1306 continue;
1307 for (unsigned j = 0; j < i; j++) {
1308 if (NewBldVec[i] == NewBldVec[j]) {
1309 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1310 RemapSwizzle[i] = j;
1311 break;
1312 }
1313 }
1314 }
1315
1316 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1317 VectorEntry.getValueType(), NewBldVec, 4);
1318}
1319
Benjamin Kramer193960c2013-06-11 13:32:25 +00001320static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1321 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001322 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1323 assert(RemapSwizzle.empty());
1324 SDValue NewBldVec[4] = {
1325 VectorEntry.getOperand(0),
1326 VectorEntry.getOperand(1),
1327 VectorEntry.getOperand(2),
1328 VectorEntry.getOperand(3)
1329 };
1330 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001331 for (unsigned i = 0; i < 4; i++)
1332 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001333
1334 for (unsigned i = 0; i < 4; i++) {
1335 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1336 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1337 ->getZExtValue();
1338 if (!isUnmovable[Idx]) {
1339 // Swap i and Idx
1340 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001341 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001342 }
1343 isUnmovable[Idx] = true;
1344 }
1345 }
1346
1347 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1348 VectorEntry.getValueType(), NewBldVec, 4);
1349}
1350
1351
1352SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1353SDValue Swz[4], SelectionDAG &DAG) const {
1354 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1355 // Old -> New swizzle values
1356 DenseMap<unsigned, unsigned> SwizzleRemap;
1357
1358 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1359 for (unsigned i = 0; i < 4; i++) {
1360 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1361 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1362 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1363 }
1364
1365 SwizzleRemap.clear();
1366 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1367 for (unsigned i = 0; i < 4; i++) {
1368 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1369 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1370 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1371 }
1372
1373 return BuildVector;
1374}
1375
1376
Tom Stellard75aadc22012-12-11 21:25:42 +00001377//===----------------------------------------------------------------------===//
1378// Custom DAG Optimizations
1379//===----------------------------------------------------------------------===//
1380
1381SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1382 DAGCombinerInfo &DCI) const {
1383 SelectionDAG &DAG = DCI.DAG;
1384
1385 switch (N->getOpcode()) {
1386 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1387 case ISD::FP_ROUND: {
1388 SDValue Arg = N->getOperand(0);
1389 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001390 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001391 Arg.getOperand(0));
1392 }
1393 break;
1394 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001395
1396 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1397 // (i32 select_cc f32, f32, -1, 0 cc)
1398 //
1399 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1400 // this to one of the SET*_DX10 instructions.
1401 case ISD::FP_TO_SINT: {
1402 SDValue FNeg = N->getOperand(0);
1403 if (FNeg.getOpcode() != ISD::FNEG) {
1404 return SDValue();
1405 }
1406 SDValue SelectCC = FNeg.getOperand(0);
1407 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1408 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1409 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1410 !isHWTrueValue(SelectCC.getOperand(2)) ||
1411 !isHWFalseValue(SelectCC.getOperand(3))) {
1412 return SDValue();
1413 }
1414
Andrew Trickef9de2a2013-05-25 02:42:55 +00001415 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001416 SelectCC.getOperand(0), // LHS
1417 SelectCC.getOperand(1), // RHS
1418 DAG.getConstant(-1, MVT::i32), // True
1419 DAG.getConstant(0, MVT::i32), // Flase
1420 SelectCC.getOperand(4)); // CC
1421
1422 break;
1423 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001424
1425 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1426 // => build_vector elt0, …, NewEltIdx, …, eltN
1427 case ISD::INSERT_VECTOR_ELT: {
1428 SDValue InVec = N->getOperand(0);
1429 SDValue InVal = N->getOperand(1);
1430 SDValue EltNo = N->getOperand(2);
1431 SDLoc dl(N);
1432
1433 // If the inserted element is an UNDEF, just use the input vector.
1434 if (InVal.getOpcode() == ISD::UNDEF)
1435 return InVec;
1436
1437 EVT VT = InVec.getValueType();
1438
1439 // If we can't generate a legal BUILD_VECTOR, exit
1440 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1441 return SDValue();
1442
1443 // Check that we know which element is being inserted
1444 if (!isa<ConstantSDNode>(EltNo))
1445 return SDValue();
1446 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1447
1448 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1449 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1450 // vector elements.
1451 SmallVector<SDValue, 8> Ops;
1452 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1453 Ops.append(InVec.getNode()->op_begin(),
1454 InVec.getNode()->op_end());
1455 } else if (InVec.getOpcode() == ISD::UNDEF) {
1456 unsigned NElts = VT.getVectorNumElements();
1457 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1458 } else {
1459 return SDValue();
1460 }
1461
1462 // Insert the element
1463 if (Elt < Ops.size()) {
1464 // All the operands of BUILD_VECTOR must have the same type;
1465 // we enforce that here.
1466 EVT OpVT = Ops[0].getValueType();
1467 if (InVal.getValueType() != OpVT)
1468 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1469 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1470 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1471 Ops[Elt] = InVal;
1472 }
1473
1474 // Return the new vector
1475 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1476 VT, &Ops[0], Ops.size());
1477 }
1478
Tom Stellard365366f2013-01-23 02:09:06 +00001479 // Extract_vec (Build_vector) generated by custom lowering
1480 // also needs to be customly combined
1481 case ISD::EXTRACT_VECTOR_ELT: {
1482 SDValue Arg = N->getOperand(0);
1483 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1484 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1485 unsigned Element = Const->getZExtValue();
1486 return Arg->getOperand(Element);
1487 }
1488 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001489 if (Arg.getOpcode() == ISD::BITCAST &&
1490 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1491 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1492 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001493 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001494 Arg->getOperand(0).getOperand(Element));
1495 }
1496 }
Tom Stellard365366f2013-01-23 02:09:06 +00001497 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001498
1499 case ISD::SELECT_CC: {
1500 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1501 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001502 //
1503 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1504 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001505 SDValue LHS = N->getOperand(0);
1506 if (LHS.getOpcode() != ISD::SELECT_CC) {
1507 return SDValue();
1508 }
1509
1510 SDValue RHS = N->getOperand(1);
1511 SDValue True = N->getOperand(2);
1512 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001513 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001514
1515 if (LHS.getOperand(2).getNode() != True.getNode() ||
1516 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001517 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001518 return SDValue();
1519 }
1520
Tom Stellard5e524892013-03-08 15:37:11 +00001521 switch (NCC) {
1522 default: return SDValue();
1523 case ISD::SETNE: return LHS;
1524 case ISD::SETEQ: {
1525 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1526 LHSCC = ISD::getSetCCInverse(LHSCC,
1527 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001528 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001529 LHS.getOperand(0),
1530 LHS.getOperand(1),
1531 LHS.getOperand(2),
1532 LHS.getOperand(3),
1533 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001534 }
Tom Stellard5e524892013-03-08 15:37:11 +00001535 }
1536 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001537 case AMDGPUISD::EXPORT: {
1538 SDValue Arg = N->getOperand(1);
1539 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1540 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001541
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001542 SDValue NewArgs[8] = {
1543 N->getOperand(0), // Chain
1544 SDValue(),
1545 N->getOperand(2), // ArrayBase
1546 N->getOperand(3), // Type
1547 N->getOperand(4), // SWZ_X
1548 N->getOperand(5), // SWZ_Y
1549 N->getOperand(6), // SWZ_Z
1550 N->getOperand(7) // SWZ_W
1551 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001552 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001553 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001554 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001555 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001556 case AMDGPUISD::TEXTURE_FETCH: {
1557 SDValue Arg = N->getOperand(1);
1558 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1559 break;
1560
1561 SDValue NewArgs[19] = {
1562 N->getOperand(0),
1563 N->getOperand(1),
1564 N->getOperand(2),
1565 N->getOperand(3),
1566 N->getOperand(4),
1567 N->getOperand(5),
1568 N->getOperand(6),
1569 N->getOperand(7),
1570 N->getOperand(8),
1571 N->getOperand(9),
1572 N->getOperand(10),
1573 N->getOperand(11),
1574 N->getOperand(12),
1575 N->getOperand(13),
1576 N->getOperand(14),
1577 N->getOperand(15),
1578 N->getOperand(16),
1579 N->getOperand(17),
1580 N->getOperand(18),
1581 };
1582 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1583 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1584 NewArgs, 19);
1585 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001586 }
1587 return SDValue();
1588}