blob: b6b65609c1e34624700d72213d06d37d24a106da [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
41 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042 setOperationAction(ISD::FADD, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000043 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000044 setOperationAction(ISD::FMUL, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000045 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000046 setOperationAction(ISD::FDIV, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000047 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000048 setOperationAction(ISD::FSUB, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000050 setOperationAction(ISD::FCOS, MVT::f32, Custom);
51 setOperationAction(ISD::FSIN, MVT::f32, Custom);
52
Tom Stellard75aadc22012-12-11 21:25:42 +000053 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000054 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard492ebea2013-03-08 15:37:07 +000056 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
57 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000058
59 setOperationAction(ISD::FSUB, MVT::f32, Expand);
60
61 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
62 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
63 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000064
Tom Stellard75aadc22012-12-11 21:25:42 +000065 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
66 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
67
Tom Stellarde8f9f282013-03-08 15:37:05 +000068 setOperationAction(ISD::SETCC, MVT::i32, Expand);
69 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000070 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
71
72 setOperationAction(ISD::SELECT, MVT::i32, Custom);
73 setOperationAction(ISD::SELECT, MVT::f32, Custom);
74
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000075 // Legalize loads and stores to the private address space.
76 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000077 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000078 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000079 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
80 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
81 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
82 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000083 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000084 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000085 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000086 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +000087 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
88 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Tom Stellard365366f2013-01-23 02:09:06 +000090 setOperationAction(ISD::LOAD, MVT::i32, Custom);
91 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
93
Tom Stellard75aadc22012-12-11 21:25:42 +000094 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000095 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000096 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000097 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +000098 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000099
Michel Danzer49812b52013-07-10 16:37:07 +0000100 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
101
Tom Stellardb852af52013-03-08 15:37:03 +0000102 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000103 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000104 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000105}
106
107MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
108 MachineInstr * MI, MachineBasicBlock * BB) const {
109 MachineFunction * MF = BB->getParent();
110 MachineRegisterInfo &MRI = MF->getRegInfo();
111 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000112 const R600InstrInfo *TII =
113 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000114
115 switch (MI->getOpcode()) {
116 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000117 case AMDGPU::CLAMP_R600: {
118 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
119 AMDGPU::MOV,
120 MI->getOperand(0).getReg(),
121 MI->getOperand(1).getReg());
122 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
123 break;
124 }
125
126 case AMDGPU::FABS_R600: {
127 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
128 AMDGPU::MOV,
129 MI->getOperand(0).getReg(),
130 MI->getOperand(1).getReg());
131 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
132 break;
133 }
134
135 case AMDGPU::FNEG_R600: {
136 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
137 AMDGPU::MOV,
138 MI->getOperand(0).getReg(),
139 MI->getOperand(1).getReg());
140 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
141 break;
142 }
143
Tom Stellard75aadc22012-12-11 21:25:42 +0000144 case AMDGPU::MASK_WRITE: {
145 unsigned maskedRegister = MI->getOperand(0).getReg();
146 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
147 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
148 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
149 break;
150 }
151
Tom Stellardc026e8b2013-06-28 15:47:08 +0000152 case AMDGPU::LDS_READ_RET: {
153 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
154 TII->get(MI->getOpcode()),
155 AMDGPU::OQAP);
156 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
157 NewMI.addOperand(MI->getOperand(i));
158 }
159 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
160 MI->getOperand(0).getReg(),
161 AMDGPU::OQAP);
162 break;
163 }
164
Tom Stellard75aadc22012-12-11 21:25:42 +0000165 case AMDGPU::MOV_IMM_F32:
166 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
167 MI->getOperand(1).getFPImm()->getValueAPF()
168 .bitcastToAPInt().getZExtValue());
169 break;
170 case AMDGPU::MOV_IMM_I32:
171 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
172 MI->getOperand(1).getImm());
173 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000174 case AMDGPU::CONST_COPY: {
175 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
176 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000177 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000178 MI->getOperand(1).getImm());
179 break;
180 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000181
182 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000183 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000184 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
185 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
186
187 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
188 .addOperand(MI->getOperand(0))
189 .addOperand(MI->getOperand(1))
190 .addImm(EOP); // Set End of program bit
191 break;
192 }
193
Tom Stellard75aadc22012-12-11 21:25:42 +0000194 case AMDGPU::TXD: {
195 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
196 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000197 MachineOperand &RID = MI->getOperand(4);
198 MachineOperand &SID = MI->getOperand(5);
199 unsigned TextureId = MI->getOperand(6).getImm();
200 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
201 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000202
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000203 switch (TextureId) {
204 case 5: // Rect
205 CTX = CTY = 0;
206 break;
207 case 6: // Shadow1D
208 SrcW = SrcZ;
209 break;
210 case 7: // Shadow2D
211 SrcW = SrcZ;
212 break;
213 case 8: // ShadowRect
214 CTX = CTY = 0;
215 SrcW = SrcZ;
216 break;
217 case 9: // 1DArray
218 SrcZ = SrcY;
219 CTZ = 0;
220 break;
221 case 10: // 2DArray
222 CTZ = 0;
223 break;
224 case 11: // Shadow1DArray
225 SrcZ = SrcY;
226 CTZ = 0;
227 break;
228 case 12: // Shadow2DArray
229 CTZ = 0;
230 break;
231 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000232 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
233 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000234 .addImm(SrcX)
235 .addImm(SrcY)
236 .addImm(SrcZ)
237 .addImm(SrcW)
238 .addImm(0)
239 .addImm(0)
240 .addImm(0)
241 .addImm(0)
242 .addImm(1)
243 .addImm(2)
244 .addImm(3)
245 .addOperand(RID)
246 .addOperand(SID)
247 .addImm(CTX)
248 .addImm(CTY)
249 .addImm(CTZ)
250 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000251 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
252 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000253 .addImm(SrcX)
254 .addImm(SrcY)
255 .addImm(SrcZ)
256 .addImm(SrcW)
257 .addImm(0)
258 .addImm(0)
259 .addImm(0)
260 .addImm(0)
261 .addImm(1)
262 .addImm(2)
263 .addImm(3)
264 .addOperand(RID)
265 .addOperand(SID)
266 .addImm(CTX)
267 .addImm(CTY)
268 .addImm(CTZ)
269 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000270 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
271 .addOperand(MI->getOperand(0))
272 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000273 .addImm(SrcX)
274 .addImm(SrcY)
275 .addImm(SrcZ)
276 .addImm(SrcW)
277 .addImm(0)
278 .addImm(0)
279 .addImm(0)
280 .addImm(0)
281 .addImm(1)
282 .addImm(2)
283 .addImm(3)
284 .addOperand(RID)
285 .addOperand(SID)
286 .addImm(CTX)
287 .addImm(CTY)
288 .addImm(CTZ)
289 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000290 .addReg(T0, RegState::Implicit)
291 .addReg(T1, RegState::Implicit);
292 break;
293 }
294
295 case AMDGPU::TXD_SHADOW: {
296 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
297 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 MachineOperand &RID = MI->getOperand(4);
299 MachineOperand &SID = MI->getOperand(5);
300 unsigned TextureId = MI->getOperand(6).getImm();
301 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
302 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
303
304 switch (TextureId) {
305 case 5: // Rect
306 CTX = CTY = 0;
307 break;
308 case 6: // Shadow1D
309 SrcW = SrcZ;
310 break;
311 case 7: // Shadow2D
312 SrcW = SrcZ;
313 break;
314 case 8: // ShadowRect
315 CTX = CTY = 0;
316 SrcW = SrcZ;
317 break;
318 case 9: // 1DArray
319 SrcZ = SrcY;
320 CTZ = 0;
321 break;
322 case 10: // 2DArray
323 CTZ = 0;
324 break;
325 case 11: // Shadow1DArray
326 SrcZ = SrcY;
327 CTZ = 0;
328 break;
329 case 12: // Shadow2DArray
330 CTZ = 0;
331 break;
332 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000333
334 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
335 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000336 .addImm(SrcX)
337 .addImm(SrcY)
338 .addImm(SrcZ)
339 .addImm(SrcW)
340 .addImm(0)
341 .addImm(0)
342 .addImm(0)
343 .addImm(0)
344 .addImm(1)
345 .addImm(2)
346 .addImm(3)
347 .addOperand(RID)
348 .addOperand(SID)
349 .addImm(CTX)
350 .addImm(CTY)
351 .addImm(CTZ)
352 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000353 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
354 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000355 .addImm(SrcX)
356 .addImm(SrcY)
357 .addImm(SrcZ)
358 .addImm(SrcW)
359 .addImm(0)
360 .addImm(0)
361 .addImm(0)
362 .addImm(0)
363 .addImm(1)
364 .addImm(2)
365 .addImm(3)
366 .addOperand(RID)
367 .addOperand(SID)
368 .addImm(CTX)
369 .addImm(CTY)
370 .addImm(CTZ)
371 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000372 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
373 .addOperand(MI->getOperand(0))
374 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000375 .addImm(SrcX)
376 .addImm(SrcY)
377 .addImm(SrcZ)
378 .addImm(SrcW)
379 .addImm(0)
380 .addImm(0)
381 .addImm(0)
382 .addImm(0)
383 .addImm(1)
384 .addImm(2)
385 .addImm(3)
386 .addOperand(RID)
387 .addOperand(SID)
388 .addImm(CTX)
389 .addImm(CTY)
390 .addImm(CTZ)
391 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000392 .addReg(T0, RegState::Implicit)
393 .addReg(T1, RegState::Implicit);
394 break;
395 }
396
397 case AMDGPU::BRANCH:
398 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000399 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000400 break;
401
402 case AMDGPU::BRANCH_COND_f32: {
403 MachineInstr *NewMI =
404 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
405 AMDGPU::PREDICATE_BIT)
406 .addOperand(MI->getOperand(1))
407 .addImm(OPCODE_IS_NOT_ZERO)
408 .addImm(0); // Flags
409 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000410 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000411 .addOperand(MI->getOperand(0))
412 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
413 break;
414 }
415
416 case AMDGPU::BRANCH_COND_i32: {
417 MachineInstr *NewMI =
418 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
419 AMDGPU::PREDICATE_BIT)
420 .addOperand(MI->getOperand(1))
421 .addImm(OPCODE_IS_NOT_ZERO_INT)
422 .addImm(0); // Flags
423 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 .addOperand(MI->getOperand(0))
426 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
427 break;
428 }
429
Tom Stellard75aadc22012-12-11 21:25:42 +0000430 case AMDGPU::EG_ExportSwz:
431 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000432 // Instruction is left unmodified if its not the last one of its type
433 bool isLastInstructionOfItsType = true;
434 unsigned InstExportType = MI->getOperand(1).getImm();
435 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
436 EndBlock = BB->end(); NextExportInst != EndBlock;
437 NextExportInst = llvm::next(NextExportInst)) {
438 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
439 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
440 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
441 .getImm();
442 if (CurrentInstExportType == InstExportType) {
443 isLastInstructionOfItsType = false;
444 break;
445 }
446 }
447 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000449 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 return BB;
451 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
452 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
453 .addOperand(MI->getOperand(0))
454 .addOperand(MI->getOperand(1))
455 .addOperand(MI->getOperand(2))
456 .addOperand(MI->getOperand(3))
457 .addOperand(MI->getOperand(4))
458 .addOperand(MI->getOperand(5))
459 .addOperand(MI->getOperand(6))
460 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000461 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000462 break;
463 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000464 case AMDGPU::RETURN: {
465 // RETURN instructions must have the live-out registers as implicit uses,
466 // otherwise they appear dead.
467 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
468 MachineInstrBuilder MIB(*MF, MI);
469 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
470 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
471 return BB;
472 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 }
474
475 MI->eraseFromParent();
476 return BB;
477}
478
479//===----------------------------------------------------------------------===//
480// Custom DAG Lowering Operations
481//===----------------------------------------------------------------------===//
482
Tom Stellard75aadc22012-12-11 21:25:42 +0000483SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000484 MachineFunction &MF = DAG.getMachineFunction();
485 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 switch (Op.getOpcode()) {
487 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000488 case ISD::FCOS:
489 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000490 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
491 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000492 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000493 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000494 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000495 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 case ISD::INTRINSIC_VOID: {
497 SDValue Chain = Op.getOperand(0);
498 unsigned IntrinsicID =
499 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
500 switch (IntrinsicID) {
501 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
503 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000504 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000505 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000507 case AMDGPUIntrinsic::R600_store_swizzle: {
508 const SDValue Args[8] = {
509 Chain,
510 Op.getOperand(2), // Export Value
511 Op.getOperand(3), // ArrayBase
512 Op.getOperand(4), // Type
513 DAG.getConstant(0, MVT::i32), // SWZ_X
514 DAG.getConstant(1, MVT::i32), // SWZ_Y
515 DAG.getConstant(2, MVT::i32), // SWZ_Z
516 DAG.getConstant(3, MVT::i32) // SWZ_W
517 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000518 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000519 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000521
Tom Stellard75aadc22012-12-11 21:25:42 +0000522 // default for switch(IntrinsicID)
523 default: break;
524 }
525 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
526 break;
527 }
528 case ISD::INTRINSIC_WO_CHAIN: {
529 unsigned IntrinsicID =
530 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
531 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000532 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000533 switch(IntrinsicID) {
534 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
535 case AMDGPUIntrinsic::R600_load_input: {
536 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
537 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000538 MachineFunction &MF = DAG.getMachineFunction();
539 MachineRegisterInfo &MRI = MF.getRegInfo();
540 MRI.addLiveIn(Reg);
541 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000542 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000544
545 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000547 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
548 MachineSDNode *interp;
549 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000550 const MachineFunction &MF = DAG.getMachineFunction();
551 const R600InstrInfo *TII =
552 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000553 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
554 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
555 return DAG.getTargetExtractSubreg(
556 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
557 DL, MVT::f32, SDValue(interp, 0));
558 }
559
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000560 MachineFunction &MF = DAG.getMachineFunction();
561 MachineRegisterInfo &MRI = MF.getRegInfo();
562 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
563 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
564 MRI.addLiveIn(RegisterI);
565 MRI.addLiveIn(RegisterJ);
566 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
567 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
568 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
569 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
570
Tom Stellard41afe6a2013-02-05 17:09:14 +0000571 if (slot % 4 < 2)
572 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
573 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000574 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000575 else
576 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
577 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000578 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000579 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000581 case AMDGPUIntrinsic::R600_tex:
582 case AMDGPUIntrinsic::R600_texc:
583 case AMDGPUIntrinsic::R600_txl:
584 case AMDGPUIntrinsic::R600_txlc:
585 case AMDGPUIntrinsic::R600_txb:
586 case AMDGPUIntrinsic::R600_txbc:
587 case AMDGPUIntrinsic::R600_txf:
588 case AMDGPUIntrinsic::R600_txq:
589 case AMDGPUIntrinsic::R600_ddx:
590 case AMDGPUIntrinsic::R600_ddy: {
591 unsigned TextureOp;
592 switch (IntrinsicID) {
593 case AMDGPUIntrinsic::R600_tex:
594 TextureOp = 0;
595 break;
596 case AMDGPUIntrinsic::R600_texc:
597 TextureOp = 1;
598 break;
599 case AMDGPUIntrinsic::R600_txl:
600 TextureOp = 2;
601 break;
602 case AMDGPUIntrinsic::R600_txlc:
603 TextureOp = 3;
604 break;
605 case AMDGPUIntrinsic::R600_txb:
606 TextureOp = 4;
607 break;
608 case AMDGPUIntrinsic::R600_txbc:
609 TextureOp = 5;
610 break;
611 case AMDGPUIntrinsic::R600_txf:
612 TextureOp = 6;
613 break;
614 case AMDGPUIntrinsic::R600_txq:
615 TextureOp = 7;
616 break;
617 case AMDGPUIntrinsic::R600_ddx:
618 TextureOp = 8;
619 break;
620 case AMDGPUIntrinsic::R600_ddy:
621 TextureOp = 9;
622 break;
623 default:
624 llvm_unreachable("Unknow Texture Operation");
625 }
626
627 SDValue TexArgs[19] = {
628 DAG.getConstant(TextureOp, MVT::i32),
629 Op.getOperand(1),
630 DAG.getConstant(0, MVT::i32),
631 DAG.getConstant(1, MVT::i32),
632 DAG.getConstant(2, MVT::i32),
633 DAG.getConstant(3, MVT::i32),
634 Op.getOperand(2),
635 Op.getOperand(3),
636 Op.getOperand(4),
637 DAG.getConstant(0, MVT::i32),
638 DAG.getConstant(1, MVT::i32),
639 DAG.getConstant(2, MVT::i32),
640 DAG.getConstant(3, MVT::i32),
641 Op.getOperand(5),
642 Op.getOperand(6),
643 Op.getOperand(7),
644 Op.getOperand(8),
645 Op.getOperand(9),
646 Op.getOperand(10)
647 };
648 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
649 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000650 case AMDGPUIntrinsic::AMDGPU_dp4: {
651 SDValue Args[8] = {
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
653 DAG.getConstant(0, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
655 DAG.getConstant(0, MVT::i32)),
656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
657 DAG.getConstant(1, MVT::i32)),
658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
659 DAG.getConstant(1, MVT::i32)),
660 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
661 DAG.getConstant(2, MVT::i32)),
662 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
663 DAG.getConstant(2, MVT::i32)),
664 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
665 DAG.getConstant(3, MVT::i32)),
666 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
667 DAG.getConstant(3, MVT::i32))
668 };
669 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
670 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000671
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000676 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000677 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000682 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000683 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000684 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000685 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000688 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000689 return LowerImplicitParameter(DAG, VT, DL, 8);
690
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000691 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000692 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
693 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000694 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000695 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
696 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000697 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000698 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
699 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000700 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
702 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000703 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000704 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
705 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
708 AMDGPU::T0_Z, VT);
709 }
710 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
711 break;
712 }
713 } // end switch(Op.getOpcode())
714 return SDValue();
715}
716
717void R600TargetLowering::ReplaceNodeResults(SDNode *N,
718 SmallVectorImpl<SDValue> &Results,
719 SelectionDAG &DAG) const {
720 switch (N->getOpcode()) {
721 default: return;
722 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000723 return;
724 case ISD::LOAD: {
725 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
726 Results.push_back(SDValue(Node, 0));
727 Results.push_back(SDValue(Node, 1));
728 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
729 // function
730 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
731 return;
732 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000733 case ISD::STORE:
734 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
735 Results.push_back(SDValue(Node, 0));
736 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000737 }
738}
739
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000740SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
741 // On hw >= R700, COS/SIN input must be between -1. and 1.
742 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
743 EVT VT = Op.getValueType();
744 SDValue Arg = Op.getOperand(0);
745 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
746 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
747 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
748 DAG.getConstantFP(0.15915494309, MVT::f32)),
749 DAG.getConstantFP(0.5, MVT::f32)));
750 unsigned TrigNode;
751 switch (Op.getOpcode()) {
752 case ISD::FCOS:
753 TrigNode = AMDGPUISD::COS_HW;
754 break;
755 case ISD::FSIN:
756 TrigNode = AMDGPUISD::SIN_HW;
757 break;
758 default:
759 llvm_unreachable("Wrong trig opcode");
760 }
761 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
762 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
763 DAG.getConstantFP(-0.5, MVT::f32)));
764 if (Gen >= AMDGPUSubtarget::R700)
765 return TrigVal;
766 // On R600 hw, COS/SIN input must be between -Pi and Pi.
767 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
768 DAG.getConstantFP(3.14159265359, MVT::f32));
769}
770
Tom Stellard75aadc22012-12-11 21:25:42 +0000771SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
772 return DAG.getNode(
773 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000774 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 MVT::i1,
776 Op, DAG.getConstantFP(0.0f, MVT::f32),
777 DAG.getCondCode(ISD::SETNE)
778 );
779}
780
Tom Stellard75aadc22012-12-11 21:25:42 +0000781SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000782 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 unsigned DwordOffset) const {
784 unsigned ByteOffset = DwordOffset * 4;
785 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000786 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000787
788 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
789 assert(isInt<16>(ByteOffset));
790
791 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
792 DAG.getConstant(ByteOffset, MVT::i32), // PTR
793 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
794 false, false, false, 0);
795}
796
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000797SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
798
799 MachineFunction &MF = DAG.getMachineFunction();
800 const AMDGPUFrameLowering *TFL =
801 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
802
803 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
804 assert(FIN);
805
806 unsigned FrameIndex = FIN->getIndex();
807 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
808 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
809}
810
Tom Stellard75aadc22012-12-11 21:25:42 +0000811bool R600TargetLowering::isZero(SDValue Op) const {
812 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
813 return Cst->isNullValue();
814 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
815 return CstFP->isZero();
816 } else {
817 return false;
818 }
819}
820
821SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000822 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000823 EVT VT = Op.getValueType();
824
825 SDValue LHS = Op.getOperand(0);
826 SDValue RHS = Op.getOperand(1);
827 SDValue True = Op.getOperand(2);
828 SDValue False = Op.getOperand(3);
829 SDValue CC = Op.getOperand(4);
830 SDValue Temp;
831
832 // LHS and RHS are guaranteed to be the same value type
833 EVT CompareVT = LHS.getValueType();
834
835 // Check if we can lower this to a native operation.
836
Tom Stellard2add82d2013-03-08 15:37:09 +0000837 // Try to lower to a SET* instruction:
838 //
839 // SET* can match the following patterns:
840 //
841 // select_cc f32, f32, -1, 0, cc_any
842 // select_cc f32, f32, 1.0f, 0.0f, cc_any
843 // select_cc i32, i32, -1, 0, cc_any
844 //
845
846 // Move hardware True/False values to the correct operand.
847 if (isHWTrueValue(False) && isHWFalseValue(True)) {
848 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
849 std::swap(False, True);
850 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
851 }
852
853 if (isHWTrueValue(True) && isHWFalseValue(False) &&
854 (CompareVT == VT || VT == MVT::i32)) {
855 // This can be matched by a SET* instruction.
856 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
857 }
858
Tom Stellard75aadc22012-12-11 21:25:42 +0000859 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000860 //
861 // CND* can match the following patterns:
862 //
863 // select_cc f32, 0.0, f32, f32, cc_any
864 // select_cc f32, 0.0, i32, i32, cc_any
865 // select_cc i32, 0, f32, f32, cc_any
866 // select_cc i32, 0, i32, i32, cc_any
867 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000868 if (isZero(LHS) || isZero(RHS)) {
869 SDValue Cond = (isZero(LHS) ? RHS : LHS);
870 SDValue Zero = (isZero(LHS) ? LHS : RHS);
871 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
872 if (CompareVT != VT) {
873 // Bitcast True / False to the correct types. This will end up being
874 // a nop, but it allows us to define only a single pattern in the
875 // .TD files for each CND* instruction rather than having to have
876 // one pattern for integer True/False and one for fp True/False
877 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
878 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
879 }
880 if (isZero(LHS)) {
881 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
882 }
883
884 switch (CCOpcode) {
885 case ISD::SETONE:
886 case ISD::SETUNE:
887 case ISD::SETNE:
888 case ISD::SETULE:
889 case ISD::SETULT:
890 case ISD::SETOLE:
891 case ISD::SETOLT:
892 case ISD::SETLE:
893 case ISD::SETLT:
894 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
895 Temp = True;
896 True = False;
897 False = Temp;
898 break;
899 default:
900 break;
901 }
902 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
903 Cond, Zero,
904 True, False,
905 DAG.getCondCode(CCOpcode));
906 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
907 }
908
Tom Stellard75aadc22012-12-11 21:25:42 +0000909
910 // Possible Min/Max pattern
911 SDValue MinMax = LowerMinMax(Op, DAG);
912 if (MinMax.getNode()) {
913 return MinMax;
914 }
915
916 // If we make it this for it means we have no native instructions to handle
917 // this SELECT_CC, so we must lower it.
918 SDValue HWTrue, HWFalse;
919
920 if (CompareVT == MVT::f32) {
921 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
922 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
923 } else if (CompareVT == MVT::i32) {
924 HWTrue = DAG.getConstant(-1, CompareVT);
925 HWFalse = DAG.getConstant(0, CompareVT);
926 }
927 else {
928 assert(!"Unhandled value type in LowerSELECT_CC");
929 }
930
931 // Lower this unsupported SELECT_CC into a combination of two supported
932 // SELECT_CC operations.
933 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
934
935 return DAG.getNode(ISD::SELECT_CC, DL, VT,
936 Cond, HWFalse,
937 True, False,
938 DAG.getCondCode(ISD::SETNE));
939}
940
941SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
942 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000943 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000944 Op.getValueType(),
945 Op.getOperand(0),
946 DAG.getConstant(0, MVT::i32),
947 Op.getOperand(1),
948 Op.getOperand(2),
949 DAG.getCondCode(ISD::SETNE));
950}
951
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000952/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
953/// convert these pointers to a register index. Each register holds
954/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
955/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
956/// for indirect addressing.
957SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
958 unsigned StackWidth,
959 SelectionDAG &DAG) const {
960 unsigned SRLPad;
961 switch(StackWidth) {
962 case 1:
963 SRLPad = 2;
964 break;
965 case 2:
966 SRLPad = 3;
967 break;
968 case 4:
969 SRLPad = 4;
970 break;
971 default: llvm_unreachable("Invalid stack width");
972 }
973
Andrew Trickef9de2a2013-05-25 02:42:55 +0000974 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000975 DAG.getConstant(SRLPad, MVT::i32));
976}
977
978void R600TargetLowering::getStackAddress(unsigned StackWidth,
979 unsigned ElemIdx,
980 unsigned &Channel,
981 unsigned &PtrIncr) const {
982 switch (StackWidth) {
983 default:
984 case 1:
985 Channel = 0;
986 if (ElemIdx > 0) {
987 PtrIncr = 1;
988 } else {
989 PtrIncr = 0;
990 }
991 break;
992 case 2:
993 Channel = ElemIdx % 2;
994 if (ElemIdx == 2) {
995 PtrIncr = 1;
996 } else {
997 PtrIncr = 0;
998 }
999 break;
1000 case 4:
1001 Channel = ElemIdx;
1002 PtrIncr = 0;
1003 break;
1004 }
1005}
1006
Tom Stellard75aadc22012-12-11 21:25:42 +00001007SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001008 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001009 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1010 SDValue Chain = Op.getOperand(0);
1011 SDValue Value = Op.getOperand(1);
1012 SDValue Ptr = Op.getOperand(2);
1013
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001014 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1015 if (StoreNode->isTruncatingStore()) {
1016 EVT VT = Value.getValueType();
1017 assert(VT == MVT::i32);
1018 EVT MemVT = StoreNode->getMemoryVT();
1019 SDValue MaskConstant;
1020 if (MemVT == MVT::i8) {
1021 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1022 } else {
1023 assert(MemVT == MVT::i16);
1024 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1025 }
1026 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1027 DAG.getConstant(2, MVT::i32));
1028 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1029 DAG.getConstant(0x00000003, VT));
1030 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1031 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1032 DAG.getConstant(3, VT));
1033 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1034 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1035 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1036 // vector instead.
1037 SDValue Src[4] = {
1038 ShiftedValue,
1039 DAG.getConstant(0, MVT::i32),
1040 DAG.getConstant(0, MVT::i32),
1041 Mask
1042 };
1043 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1044 SDValue Args[3] = { Chain, Input, DWordAddr };
1045 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1046 Op->getVTList(), Args, 3, MemVT,
1047 StoreNode->getMemOperand());
1048 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1049 Value.getValueType().bitsGE(MVT::i32)) {
1050 // Convert pointer from byte address to dword address.
1051 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1052 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1053 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001054
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001055 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1056 assert(!"Truncated and indexed stores not supported yet");
1057 } else {
1058 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1059 }
1060 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001061 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001062 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001063
1064 EVT ValueVT = Value.getValueType();
1065
1066 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1067 return SDValue();
1068 }
1069
1070 // Lowering for indirect addressing
1071
1072 const MachineFunction &MF = DAG.getMachineFunction();
1073 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1074 getTargetMachine().getFrameLowering());
1075 unsigned StackWidth = TFL->getStackWidth(MF);
1076
1077 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1078
1079 if (ValueVT.isVector()) {
1080 unsigned NumElemVT = ValueVT.getVectorNumElements();
1081 EVT ElemVT = ValueVT.getVectorElementType();
1082 SDValue Stores[4];
1083
1084 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1085 "vector width in load");
1086
1087 for (unsigned i = 0; i < NumElemVT; ++i) {
1088 unsigned Channel, PtrIncr;
1089 getStackAddress(StackWidth, i, Channel, PtrIncr);
1090 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1091 DAG.getConstant(PtrIncr, MVT::i32));
1092 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1093 Value, DAG.getConstant(i, MVT::i32));
1094
1095 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1096 Chain, Elem, Ptr,
1097 DAG.getTargetConstant(Channel, MVT::i32));
1098 }
1099 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1100 } else {
1101 if (ValueVT == MVT::i8) {
1102 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1103 }
1104 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001105 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001106 }
1107
1108 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001109}
1110
Tom Stellard365366f2013-01-23 02:09:06 +00001111// return (512 + (kc_bank << 12)
1112static int
1113ConstantAddressBlock(unsigned AddressSpace) {
1114 switch (AddressSpace) {
1115 case AMDGPUAS::CONSTANT_BUFFER_0:
1116 return 512;
1117 case AMDGPUAS::CONSTANT_BUFFER_1:
1118 return 512 + 4096;
1119 case AMDGPUAS::CONSTANT_BUFFER_2:
1120 return 512 + 4096 * 2;
1121 case AMDGPUAS::CONSTANT_BUFFER_3:
1122 return 512 + 4096 * 3;
1123 case AMDGPUAS::CONSTANT_BUFFER_4:
1124 return 512 + 4096 * 4;
1125 case AMDGPUAS::CONSTANT_BUFFER_5:
1126 return 512 + 4096 * 5;
1127 case AMDGPUAS::CONSTANT_BUFFER_6:
1128 return 512 + 4096 * 6;
1129 case AMDGPUAS::CONSTANT_BUFFER_7:
1130 return 512 + 4096 * 7;
1131 case AMDGPUAS::CONSTANT_BUFFER_8:
1132 return 512 + 4096 * 8;
1133 case AMDGPUAS::CONSTANT_BUFFER_9:
1134 return 512 + 4096 * 9;
1135 case AMDGPUAS::CONSTANT_BUFFER_10:
1136 return 512 + 4096 * 10;
1137 case AMDGPUAS::CONSTANT_BUFFER_11:
1138 return 512 + 4096 * 11;
1139 case AMDGPUAS::CONSTANT_BUFFER_12:
1140 return 512 + 4096 * 12;
1141 case AMDGPUAS::CONSTANT_BUFFER_13:
1142 return 512 + 4096 * 13;
1143 case AMDGPUAS::CONSTANT_BUFFER_14:
1144 return 512 + 4096 * 14;
1145 case AMDGPUAS::CONSTANT_BUFFER_15:
1146 return 512 + 4096 * 15;
1147 default:
1148 return -1;
1149 }
1150}
1151
1152SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1153{
1154 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001155 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001156 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1157 SDValue Chain = Op.getOperand(0);
1158 SDValue Ptr = Op.getOperand(1);
1159 SDValue LoweredLoad;
1160
1161 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1162 if (ConstantBlock > -1) {
1163 SDValue Result;
1164 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001165 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1166 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001167 SDValue Slots[4];
1168 for (unsigned i = 0; i < 4; i++) {
1169 // We want Const position encoded with the following formula :
1170 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1171 // const_index is Ptr computed by llvm using an alignment of 16.
1172 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1173 // then div by 4 at the ISel step
1174 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1175 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1176 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1177 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001178 EVT NewVT = MVT::v4i32;
1179 unsigned NumElements = 4;
1180 if (VT.isVector()) {
1181 NewVT = VT;
1182 NumElements = VT.getVectorNumElements();
1183 }
1184 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001185 } else {
1186 // non constant ptr cant be folded, keeps it as a v4f32 load
1187 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001188 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001189 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001190 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001191 );
1192 }
1193
1194 if (!VT.isVector()) {
1195 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1196 DAG.getConstant(0, MVT::i32));
1197 }
1198
1199 SDValue MergedValues[2] = {
1200 Result,
1201 Chain
1202 };
1203 return DAG.getMergeValues(MergedValues, 2, DL);
1204 }
1205
Tom Stellard84021442013-07-23 01:48:24 +00001206 // For most operations returning SDValue() will result int he node being
1207 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1208 // we need to manually expand loads that may be legal in some address spaces
1209 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1210 // for compute shaders, since the data is sign extended when it is uploaded
1211 // to the buffer. Howerver SEXT loads from other addresspaces are not
1212 // supported, so we need to expand them here.
1213 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1214 EVT MemVT = LoadNode->getMemoryVT();
1215 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1216 SDValue ShiftAmount =
1217 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1218 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1219 LoadNode->getPointerInfo(), MemVT,
1220 LoadNode->isVolatile(),
1221 LoadNode->isNonTemporal(),
1222 LoadNode->getAlignment());
1223 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1224 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1225
1226 SDValue MergedValues[2] = { Sra, Chain };
1227 return DAG.getMergeValues(MergedValues, 2, DL);
1228 }
1229
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001230 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1231 return SDValue();
1232 }
1233
1234 // Lowering for indirect addressing
1235 const MachineFunction &MF = DAG.getMachineFunction();
1236 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1237 getTargetMachine().getFrameLowering());
1238 unsigned StackWidth = TFL->getStackWidth(MF);
1239
1240 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1241
1242 if (VT.isVector()) {
1243 unsigned NumElemVT = VT.getVectorNumElements();
1244 EVT ElemVT = VT.getVectorElementType();
1245 SDValue Loads[4];
1246
1247 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1248 "vector width in load");
1249
1250 for (unsigned i = 0; i < NumElemVT; ++i) {
1251 unsigned Channel, PtrIncr;
1252 getStackAddress(StackWidth, i, Channel, PtrIncr);
1253 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1254 DAG.getConstant(PtrIncr, MVT::i32));
1255 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1256 Chain, Ptr,
1257 DAG.getTargetConstant(Channel, MVT::i32),
1258 Op.getOperand(2));
1259 }
1260 for (unsigned i = NumElemVT; i < 4; ++i) {
1261 Loads[i] = DAG.getUNDEF(ElemVT);
1262 }
1263 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1264 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1265 } else {
1266 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1267 Chain, Ptr,
1268 DAG.getTargetConstant(0, MVT::i32), // Channel
1269 Op.getOperand(2));
1270 }
1271
1272 SDValue Ops[2];
1273 Ops[0] = LoweredLoad;
1274 Ops[1] = Chain;
1275
1276 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001277}
Tom Stellard75aadc22012-12-11 21:25:42 +00001278
Tom Stellard75aadc22012-12-11 21:25:42 +00001279/// XXX Only kernel functions are supported, so we can assume for now that
1280/// every function is a kernel function, but in the future we should use
1281/// separate calling conventions for kernel and non-kernel functions.
1282SDValue R600TargetLowering::LowerFormalArguments(
1283 SDValue Chain,
1284 CallingConv::ID CallConv,
1285 bool isVarArg,
1286 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001287 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001288 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001289 SmallVector<CCValAssign, 16> ArgLocs;
1290 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1291 getTargetMachine(), ArgLocs, *DAG.getContext());
1292
1293 AnalyzeFormalArguments(CCInfo, Ins);
1294
Tom Stellard1e803092013-07-23 01:48:18 +00001295 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001296 CCValAssign &VA = ArgLocs[i];
1297 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001298
Tom Stellard75aadc22012-12-11 21:25:42 +00001299 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001300 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001301
1302 // The first 36 bytes of the input buffer contains information about
1303 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001304 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001305 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001306 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1307 false, false, 4); // 4 is the prefered alignment for
1308 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001309 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001310 }
1311 return Chain;
1312}
1313
Matt Arsenault758659232013-05-18 00:21:46 +00001314EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001315 if (!VT.isVector()) return MVT::i32;
1316 return VT.changeVectorElementTypeToInteger();
1317}
1318
Benjamin Kramer193960c2013-06-11 13:32:25 +00001319static SDValue
1320CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1321 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001322 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1323 assert(RemapSwizzle.empty());
1324 SDValue NewBldVec[4] = {
1325 VectorEntry.getOperand(0),
1326 VectorEntry.getOperand(1),
1327 VectorEntry.getOperand(2),
1328 VectorEntry.getOperand(3)
1329 };
1330
1331 for (unsigned i = 0; i < 4; i++) {
1332 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1333 if (C->isZero()) {
1334 RemapSwizzle[i] = 4; // SEL_0
1335 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1336 } else if (C->isExactlyValue(1.0)) {
1337 RemapSwizzle[i] = 5; // SEL_1
1338 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1339 }
1340 }
1341
1342 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1343 continue;
1344 for (unsigned j = 0; j < i; j++) {
1345 if (NewBldVec[i] == NewBldVec[j]) {
1346 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1347 RemapSwizzle[i] = j;
1348 break;
1349 }
1350 }
1351 }
1352
1353 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1354 VectorEntry.getValueType(), NewBldVec, 4);
1355}
1356
Benjamin Kramer193960c2013-06-11 13:32:25 +00001357static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1358 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001359 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1360 assert(RemapSwizzle.empty());
1361 SDValue NewBldVec[4] = {
1362 VectorEntry.getOperand(0),
1363 VectorEntry.getOperand(1),
1364 VectorEntry.getOperand(2),
1365 VectorEntry.getOperand(3)
1366 };
1367 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001368 for (unsigned i = 0; i < 4; i++)
1369 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001370
1371 for (unsigned i = 0; i < 4; i++) {
1372 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1373 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1374 ->getZExtValue();
1375 if (!isUnmovable[Idx]) {
1376 // Swap i and Idx
1377 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001378 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001379 }
1380 isUnmovable[Idx] = true;
1381 }
1382 }
1383
1384 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1385 VectorEntry.getValueType(), NewBldVec, 4);
1386}
1387
1388
1389SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1390SDValue Swz[4], SelectionDAG &DAG) const {
1391 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1392 // Old -> New swizzle values
1393 DenseMap<unsigned, unsigned> SwizzleRemap;
1394
1395 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1396 for (unsigned i = 0; i < 4; i++) {
1397 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1398 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1399 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1400 }
1401
1402 SwizzleRemap.clear();
1403 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1404 for (unsigned i = 0; i < 4; i++) {
1405 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1406 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1407 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1408 }
1409
1410 return BuildVector;
1411}
1412
1413
Tom Stellard75aadc22012-12-11 21:25:42 +00001414//===----------------------------------------------------------------------===//
1415// Custom DAG Optimizations
1416//===----------------------------------------------------------------------===//
1417
1418SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1419 DAGCombinerInfo &DCI) const {
1420 SelectionDAG &DAG = DCI.DAG;
1421
1422 switch (N->getOpcode()) {
1423 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1424 case ISD::FP_ROUND: {
1425 SDValue Arg = N->getOperand(0);
1426 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001427 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001428 Arg.getOperand(0));
1429 }
1430 break;
1431 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001432
1433 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1434 // (i32 select_cc f32, f32, -1, 0 cc)
1435 //
1436 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1437 // this to one of the SET*_DX10 instructions.
1438 case ISD::FP_TO_SINT: {
1439 SDValue FNeg = N->getOperand(0);
1440 if (FNeg.getOpcode() != ISD::FNEG) {
1441 return SDValue();
1442 }
1443 SDValue SelectCC = FNeg.getOperand(0);
1444 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1445 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1446 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1447 !isHWTrueValue(SelectCC.getOperand(2)) ||
1448 !isHWFalseValue(SelectCC.getOperand(3))) {
1449 return SDValue();
1450 }
1451
Andrew Trickef9de2a2013-05-25 02:42:55 +00001452 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001453 SelectCC.getOperand(0), // LHS
1454 SelectCC.getOperand(1), // RHS
1455 DAG.getConstant(-1, MVT::i32), // True
1456 DAG.getConstant(0, MVT::i32), // Flase
1457 SelectCC.getOperand(4)); // CC
1458
1459 break;
1460 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001461
1462 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1463 // => build_vector elt0, …, NewEltIdx, …, eltN
1464 case ISD::INSERT_VECTOR_ELT: {
1465 SDValue InVec = N->getOperand(0);
1466 SDValue InVal = N->getOperand(1);
1467 SDValue EltNo = N->getOperand(2);
1468 SDLoc dl(N);
1469
1470 // If the inserted element is an UNDEF, just use the input vector.
1471 if (InVal.getOpcode() == ISD::UNDEF)
1472 return InVec;
1473
1474 EVT VT = InVec.getValueType();
1475
1476 // If we can't generate a legal BUILD_VECTOR, exit
1477 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1478 return SDValue();
1479
1480 // Check that we know which element is being inserted
1481 if (!isa<ConstantSDNode>(EltNo))
1482 return SDValue();
1483 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1484
1485 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1486 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1487 // vector elements.
1488 SmallVector<SDValue, 8> Ops;
1489 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1490 Ops.append(InVec.getNode()->op_begin(),
1491 InVec.getNode()->op_end());
1492 } else if (InVec.getOpcode() == ISD::UNDEF) {
1493 unsigned NElts = VT.getVectorNumElements();
1494 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1495 } else {
1496 return SDValue();
1497 }
1498
1499 // Insert the element
1500 if (Elt < Ops.size()) {
1501 // All the operands of BUILD_VECTOR must have the same type;
1502 // we enforce that here.
1503 EVT OpVT = Ops[0].getValueType();
1504 if (InVal.getValueType() != OpVT)
1505 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1506 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1507 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1508 Ops[Elt] = InVal;
1509 }
1510
1511 // Return the new vector
1512 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1513 VT, &Ops[0], Ops.size());
1514 }
1515
Tom Stellard365366f2013-01-23 02:09:06 +00001516 // Extract_vec (Build_vector) generated by custom lowering
1517 // also needs to be customly combined
1518 case ISD::EXTRACT_VECTOR_ELT: {
1519 SDValue Arg = N->getOperand(0);
1520 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1521 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1522 unsigned Element = Const->getZExtValue();
1523 return Arg->getOperand(Element);
1524 }
1525 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001526 if (Arg.getOpcode() == ISD::BITCAST &&
1527 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1528 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1529 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001530 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001531 Arg->getOperand(0).getOperand(Element));
1532 }
1533 }
Tom Stellard365366f2013-01-23 02:09:06 +00001534 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001535
1536 case ISD::SELECT_CC: {
1537 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1538 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001539 //
1540 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1541 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001542 SDValue LHS = N->getOperand(0);
1543 if (LHS.getOpcode() != ISD::SELECT_CC) {
1544 return SDValue();
1545 }
1546
1547 SDValue RHS = N->getOperand(1);
1548 SDValue True = N->getOperand(2);
1549 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001550 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001551
1552 if (LHS.getOperand(2).getNode() != True.getNode() ||
1553 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001554 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001555 return SDValue();
1556 }
1557
Tom Stellard5e524892013-03-08 15:37:11 +00001558 switch (NCC) {
1559 default: return SDValue();
1560 case ISD::SETNE: return LHS;
1561 case ISD::SETEQ: {
1562 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1563 LHSCC = ISD::getSetCCInverse(LHSCC,
1564 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001565 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001566 LHS.getOperand(0),
1567 LHS.getOperand(1),
1568 LHS.getOperand(2),
1569 LHS.getOperand(3),
1570 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001571 }
Tom Stellard5e524892013-03-08 15:37:11 +00001572 }
1573 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001574 case AMDGPUISD::EXPORT: {
1575 SDValue Arg = N->getOperand(1);
1576 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1577 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001578
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001579 SDValue NewArgs[8] = {
1580 N->getOperand(0), // Chain
1581 SDValue(),
1582 N->getOperand(2), // ArrayBase
1583 N->getOperand(3), // Type
1584 N->getOperand(4), // SWZ_X
1585 N->getOperand(5), // SWZ_Y
1586 N->getOperand(6), // SWZ_Z
1587 N->getOperand(7) // SWZ_W
1588 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001589 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001590 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001591 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001592 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001593 case AMDGPUISD::TEXTURE_FETCH: {
1594 SDValue Arg = N->getOperand(1);
1595 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1596 break;
1597
1598 SDValue NewArgs[19] = {
1599 N->getOperand(0),
1600 N->getOperand(1),
1601 N->getOperand(2),
1602 N->getOperand(3),
1603 N->getOperand(4),
1604 N->getOperand(5),
1605 N->getOperand(6),
1606 N->getOperand(7),
1607 N->getOperand(8),
1608 N->getOperand(9),
1609 N->getOperand(10),
1610 N->getOperand(11),
1611 N->getOperand(12),
1612 N->getOperand(13),
1613 N->getOperand(14),
1614 N->getOperand(15),
1615 N->getOperand(16),
1616 N->getOperand(17),
1617 N->getOperand(18),
1618 };
1619 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1620 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1621 NewArgs, 19);
1622 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001623 }
1624 return SDValue();
1625}