blob: e10af2b16bafb05943ff7e745131ed9b959355d9 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
41 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042 setOperationAction(ISD::FADD, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000043 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000044 setOperationAction(ISD::FMUL, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000045 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000046 setOperationAction(ISD::FDIV, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000047 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000048 setOperationAction(ISD::FSUB, MVT::v2f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000050 setOperationAction(ISD::FCOS, MVT::f32, Custom);
51 setOperationAction(ISD::FSIN, MVT::f32, Custom);
52
Tom Stellard75aadc22012-12-11 21:25:42 +000053 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000054 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard492ebea2013-03-08 15:37:07 +000056 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
57 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000058
59 setOperationAction(ISD::FSUB, MVT::f32, Expand);
60
61 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
62 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
63 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000064
Tom Stellard75aadc22012-12-11 21:25:42 +000065 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
66 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
67
Tom Stellarde8f9f282013-03-08 15:37:05 +000068 setOperationAction(ISD::SETCC, MVT::i32, Expand);
69 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000070 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
71
72 setOperationAction(ISD::SELECT, MVT::i32, Custom);
73 setOperationAction(ISD::SELECT, MVT::f32, Custom);
74
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000075 // Legalize loads and stores to the private address space.
76 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000077 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000078 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000079 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
80 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
81 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
82 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000083 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000084 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000085 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000086 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +000087 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
88 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Tom Stellard365366f2013-01-23 02:09:06 +000090 setOperationAction(ISD::LOAD, MVT::i32, Custom);
91 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
93
Tom Stellard75aadc22012-12-11 21:25:42 +000094 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000095 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000096 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000097 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +000098 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000099
Michel Danzer49812b52013-07-10 16:37:07 +0000100 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
101
Tom Stellardb852af52013-03-08 15:37:03 +0000102 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000103 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000104 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000105}
106
107MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
108 MachineInstr * MI, MachineBasicBlock * BB) const {
109 MachineFunction * MF = BB->getParent();
110 MachineRegisterInfo &MRI = MF->getRegInfo();
111 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000112 const R600InstrInfo *TII =
113 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000114
115 switch (MI->getOpcode()) {
116 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000117 case AMDGPU::CLAMP_R600: {
118 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
119 AMDGPU::MOV,
120 MI->getOperand(0).getReg(),
121 MI->getOperand(1).getReg());
122 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
123 break;
124 }
125
126 case AMDGPU::FABS_R600: {
127 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
128 AMDGPU::MOV,
129 MI->getOperand(0).getReg(),
130 MI->getOperand(1).getReg());
131 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
132 break;
133 }
134
135 case AMDGPU::FNEG_R600: {
136 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
137 AMDGPU::MOV,
138 MI->getOperand(0).getReg(),
139 MI->getOperand(1).getReg());
140 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
141 break;
142 }
143
Tom Stellard75aadc22012-12-11 21:25:42 +0000144 case AMDGPU::MASK_WRITE: {
145 unsigned maskedRegister = MI->getOperand(0).getReg();
146 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
147 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
148 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
149 break;
150 }
151
Tom Stellardc026e8b2013-06-28 15:47:08 +0000152 case AMDGPU::LDS_READ_RET: {
153 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
154 TII->get(MI->getOpcode()),
155 AMDGPU::OQAP);
156 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
157 NewMI.addOperand(MI->getOperand(i));
158 }
159 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
160 MI->getOperand(0).getReg(),
161 AMDGPU::OQAP);
162 break;
163 }
164
Tom Stellard75aadc22012-12-11 21:25:42 +0000165 case AMDGPU::MOV_IMM_F32:
166 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
167 MI->getOperand(1).getFPImm()->getValueAPF()
168 .bitcastToAPInt().getZExtValue());
169 break;
170 case AMDGPU::MOV_IMM_I32:
171 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
172 MI->getOperand(1).getImm());
173 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000174 case AMDGPU::CONST_COPY: {
175 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
176 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000177 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000178 MI->getOperand(1).getImm());
179 break;
180 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000181
182 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000183 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000184 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
185 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
186
187 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
188 .addOperand(MI->getOperand(0))
189 .addOperand(MI->getOperand(1))
190 .addImm(EOP); // Set End of program bit
191 break;
192 }
193
Tom Stellard75aadc22012-12-11 21:25:42 +0000194 case AMDGPU::TXD: {
195 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
196 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000197 MachineOperand &RID = MI->getOperand(4);
198 MachineOperand &SID = MI->getOperand(5);
199 unsigned TextureId = MI->getOperand(6).getImm();
200 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
201 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000202
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000203 switch (TextureId) {
204 case 5: // Rect
205 CTX = CTY = 0;
206 break;
207 case 6: // Shadow1D
208 SrcW = SrcZ;
209 break;
210 case 7: // Shadow2D
211 SrcW = SrcZ;
212 break;
213 case 8: // ShadowRect
214 CTX = CTY = 0;
215 SrcW = SrcZ;
216 break;
217 case 9: // 1DArray
218 SrcZ = SrcY;
219 CTZ = 0;
220 break;
221 case 10: // 2DArray
222 CTZ = 0;
223 break;
224 case 11: // Shadow1DArray
225 SrcZ = SrcY;
226 CTZ = 0;
227 break;
228 case 12: // Shadow2DArray
229 CTZ = 0;
230 break;
231 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000232 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
233 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000234 .addImm(SrcX)
235 .addImm(SrcY)
236 .addImm(SrcZ)
237 .addImm(SrcW)
238 .addImm(0)
239 .addImm(0)
240 .addImm(0)
241 .addImm(0)
242 .addImm(1)
243 .addImm(2)
244 .addImm(3)
245 .addOperand(RID)
246 .addOperand(SID)
247 .addImm(CTX)
248 .addImm(CTY)
249 .addImm(CTZ)
250 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000251 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
252 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000253 .addImm(SrcX)
254 .addImm(SrcY)
255 .addImm(SrcZ)
256 .addImm(SrcW)
257 .addImm(0)
258 .addImm(0)
259 .addImm(0)
260 .addImm(0)
261 .addImm(1)
262 .addImm(2)
263 .addImm(3)
264 .addOperand(RID)
265 .addOperand(SID)
266 .addImm(CTX)
267 .addImm(CTY)
268 .addImm(CTZ)
269 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000270 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
271 .addOperand(MI->getOperand(0))
272 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000273 .addImm(SrcX)
274 .addImm(SrcY)
275 .addImm(SrcZ)
276 .addImm(SrcW)
277 .addImm(0)
278 .addImm(0)
279 .addImm(0)
280 .addImm(0)
281 .addImm(1)
282 .addImm(2)
283 .addImm(3)
284 .addOperand(RID)
285 .addOperand(SID)
286 .addImm(CTX)
287 .addImm(CTY)
288 .addImm(CTZ)
289 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000290 .addReg(T0, RegState::Implicit)
291 .addReg(T1, RegState::Implicit);
292 break;
293 }
294
295 case AMDGPU::TXD_SHADOW: {
296 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
297 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 MachineOperand &RID = MI->getOperand(4);
299 MachineOperand &SID = MI->getOperand(5);
300 unsigned TextureId = MI->getOperand(6).getImm();
301 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
302 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
303
304 switch (TextureId) {
305 case 5: // Rect
306 CTX = CTY = 0;
307 break;
308 case 6: // Shadow1D
309 SrcW = SrcZ;
310 break;
311 case 7: // Shadow2D
312 SrcW = SrcZ;
313 break;
314 case 8: // ShadowRect
315 CTX = CTY = 0;
316 SrcW = SrcZ;
317 break;
318 case 9: // 1DArray
319 SrcZ = SrcY;
320 CTZ = 0;
321 break;
322 case 10: // 2DArray
323 CTZ = 0;
324 break;
325 case 11: // Shadow1DArray
326 SrcZ = SrcY;
327 CTZ = 0;
328 break;
329 case 12: // Shadow2DArray
330 CTZ = 0;
331 break;
332 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000333
334 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
335 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000336 .addImm(SrcX)
337 .addImm(SrcY)
338 .addImm(SrcZ)
339 .addImm(SrcW)
340 .addImm(0)
341 .addImm(0)
342 .addImm(0)
343 .addImm(0)
344 .addImm(1)
345 .addImm(2)
346 .addImm(3)
347 .addOperand(RID)
348 .addOperand(SID)
349 .addImm(CTX)
350 .addImm(CTY)
351 .addImm(CTZ)
352 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000353 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
354 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000355 .addImm(SrcX)
356 .addImm(SrcY)
357 .addImm(SrcZ)
358 .addImm(SrcW)
359 .addImm(0)
360 .addImm(0)
361 .addImm(0)
362 .addImm(0)
363 .addImm(1)
364 .addImm(2)
365 .addImm(3)
366 .addOperand(RID)
367 .addOperand(SID)
368 .addImm(CTX)
369 .addImm(CTY)
370 .addImm(CTZ)
371 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000372 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
373 .addOperand(MI->getOperand(0))
374 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000375 .addImm(SrcX)
376 .addImm(SrcY)
377 .addImm(SrcZ)
378 .addImm(SrcW)
379 .addImm(0)
380 .addImm(0)
381 .addImm(0)
382 .addImm(0)
383 .addImm(1)
384 .addImm(2)
385 .addImm(3)
386 .addOperand(RID)
387 .addOperand(SID)
388 .addImm(CTX)
389 .addImm(CTY)
390 .addImm(CTZ)
391 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000392 .addReg(T0, RegState::Implicit)
393 .addReg(T1, RegState::Implicit);
394 break;
395 }
396
397 case AMDGPU::BRANCH:
398 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000399 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000400 break;
401
402 case AMDGPU::BRANCH_COND_f32: {
403 MachineInstr *NewMI =
404 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
405 AMDGPU::PREDICATE_BIT)
406 .addOperand(MI->getOperand(1))
407 .addImm(OPCODE_IS_NOT_ZERO)
408 .addImm(0); // Flags
409 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000410 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000411 .addOperand(MI->getOperand(0))
412 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
413 break;
414 }
415
416 case AMDGPU::BRANCH_COND_i32: {
417 MachineInstr *NewMI =
418 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
419 AMDGPU::PREDICATE_BIT)
420 .addOperand(MI->getOperand(1))
421 .addImm(OPCODE_IS_NOT_ZERO_INT)
422 .addImm(0); // Flags
423 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 .addOperand(MI->getOperand(0))
426 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
427 break;
428 }
429
Tom Stellard75aadc22012-12-11 21:25:42 +0000430 case AMDGPU::EG_ExportSwz:
431 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000432 // Instruction is left unmodified if its not the last one of its type
433 bool isLastInstructionOfItsType = true;
434 unsigned InstExportType = MI->getOperand(1).getImm();
435 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
436 EndBlock = BB->end(); NextExportInst != EndBlock;
437 NextExportInst = llvm::next(NextExportInst)) {
438 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
439 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
440 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
441 .getImm();
442 if (CurrentInstExportType == InstExportType) {
443 isLastInstructionOfItsType = false;
444 break;
445 }
446 }
447 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000449 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 return BB;
451 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
452 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
453 .addOperand(MI->getOperand(0))
454 .addOperand(MI->getOperand(1))
455 .addOperand(MI->getOperand(2))
456 .addOperand(MI->getOperand(3))
457 .addOperand(MI->getOperand(4))
458 .addOperand(MI->getOperand(5))
459 .addOperand(MI->getOperand(6))
460 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000461 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000462 break;
463 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000464 case AMDGPU::RETURN: {
465 // RETURN instructions must have the live-out registers as implicit uses,
466 // otherwise they appear dead.
467 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
468 MachineInstrBuilder MIB(*MF, MI);
469 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
470 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
471 return BB;
472 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 }
474
475 MI->eraseFromParent();
476 return BB;
477}
478
479//===----------------------------------------------------------------------===//
480// Custom DAG Lowering Operations
481//===----------------------------------------------------------------------===//
482
Tom Stellard75aadc22012-12-11 21:25:42 +0000483SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000484 MachineFunction &MF = DAG.getMachineFunction();
485 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 switch (Op.getOpcode()) {
487 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000488 case ISD::FCOS:
489 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000490 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
491 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000492 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000493 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000494 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000495 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 case ISD::INTRINSIC_VOID: {
497 SDValue Chain = Op.getOperand(0);
498 unsigned IntrinsicID =
499 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
500 switch (IntrinsicID) {
501 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
503 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000504 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000505 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000507 case AMDGPUIntrinsic::R600_store_swizzle: {
508 const SDValue Args[8] = {
509 Chain,
510 Op.getOperand(2), // Export Value
511 Op.getOperand(3), // ArrayBase
512 Op.getOperand(4), // Type
513 DAG.getConstant(0, MVT::i32), // SWZ_X
514 DAG.getConstant(1, MVT::i32), // SWZ_Y
515 DAG.getConstant(2, MVT::i32), // SWZ_Z
516 DAG.getConstant(3, MVT::i32) // SWZ_W
517 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000518 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000519 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000521
Tom Stellard75aadc22012-12-11 21:25:42 +0000522 // default for switch(IntrinsicID)
523 default: break;
524 }
525 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
526 break;
527 }
528 case ISD::INTRINSIC_WO_CHAIN: {
529 unsigned IntrinsicID =
530 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
531 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000532 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000533 switch(IntrinsicID) {
534 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
535 case AMDGPUIntrinsic::R600_load_input: {
536 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
537 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000538 MachineFunction &MF = DAG.getMachineFunction();
539 MachineRegisterInfo &MRI = MF.getRegInfo();
540 MRI.addLiveIn(Reg);
541 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000542 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000544
545 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000547 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
548 MachineSDNode *interp;
549 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000550 const MachineFunction &MF = DAG.getMachineFunction();
551 const R600InstrInfo *TII =
552 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000553 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
554 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
555 return DAG.getTargetExtractSubreg(
556 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
557 DL, MVT::f32, SDValue(interp, 0));
558 }
559
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000560 MachineFunction &MF = DAG.getMachineFunction();
561 MachineRegisterInfo &MRI = MF.getRegInfo();
562 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
563 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
564 MRI.addLiveIn(RegisterI);
565 MRI.addLiveIn(RegisterJ);
566 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
567 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
568 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
569 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
570
Tom Stellard41afe6a2013-02-05 17:09:14 +0000571 if (slot % 4 < 2)
572 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
573 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000574 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000575 else
576 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
577 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000578 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000579 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000581 case AMDGPUIntrinsic::R600_tex:
582 case AMDGPUIntrinsic::R600_texc:
583 case AMDGPUIntrinsic::R600_txl:
584 case AMDGPUIntrinsic::R600_txlc:
585 case AMDGPUIntrinsic::R600_txb:
586 case AMDGPUIntrinsic::R600_txbc:
587 case AMDGPUIntrinsic::R600_txf:
588 case AMDGPUIntrinsic::R600_txq:
589 case AMDGPUIntrinsic::R600_ddx:
590 case AMDGPUIntrinsic::R600_ddy: {
591 unsigned TextureOp;
592 switch (IntrinsicID) {
593 case AMDGPUIntrinsic::R600_tex:
594 TextureOp = 0;
595 break;
596 case AMDGPUIntrinsic::R600_texc:
597 TextureOp = 1;
598 break;
599 case AMDGPUIntrinsic::R600_txl:
600 TextureOp = 2;
601 break;
602 case AMDGPUIntrinsic::R600_txlc:
603 TextureOp = 3;
604 break;
605 case AMDGPUIntrinsic::R600_txb:
606 TextureOp = 4;
607 break;
608 case AMDGPUIntrinsic::R600_txbc:
609 TextureOp = 5;
610 break;
611 case AMDGPUIntrinsic::R600_txf:
612 TextureOp = 6;
613 break;
614 case AMDGPUIntrinsic::R600_txq:
615 TextureOp = 7;
616 break;
617 case AMDGPUIntrinsic::R600_ddx:
618 TextureOp = 8;
619 break;
620 case AMDGPUIntrinsic::R600_ddy:
621 TextureOp = 9;
622 break;
623 default:
624 llvm_unreachable("Unknow Texture Operation");
625 }
626
627 SDValue TexArgs[19] = {
628 DAG.getConstant(TextureOp, MVT::i32),
629 Op.getOperand(1),
630 DAG.getConstant(0, MVT::i32),
631 DAG.getConstant(1, MVT::i32),
632 DAG.getConstant(2, MVT::i32),
633 DAG.getConstant(3, MVT::i32),
634 Op.getOperand(2),
635 Op.getOperand(3),
636 Op.getOperand(4),
637 DAG.getConstant(0, MVT::i32),
638 DAG.getConstant(1, MVT::i32),
639 DAG.getConstant(2, MVT::i32),
640 DAG.getConstant(3, MVT::i32),
641 Op.getOperand(5),
642 Op.getOperand(6),
643 Op.getOperand(7),
644 Op.getOperand(8),
645 Op.getOperand(9),
646 Op.getOperand(10)
647 };
648 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
649 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000650 case AMDGPUIntrinsic::AMDGPU_dp4: {
651 SDValue Args[8] = {
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
653 DAG.getConstant(0, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
655 DAG.getConstant(0, MVT::i32)),
656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
657 DAG.getConstant(1, MVT::i32)),
658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
659 DAG.getConstant(1, MVT::i32)),
660 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
661 DAG.getConstant(2, MVT::i32)),
662 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
663 DAG.getConstant(2, MVT::i32)),
664 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
665 DAG.getConstant(3, MVT::i32)),
666 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
667 DAG.getConstant(3, MVT::i32))
668 };
669 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
670 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000671
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000676 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000677 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000682 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000683 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000684 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000685 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000688 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000689 return LowerImplicitParameter(DAG, VT, DL, 8);
690
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000691 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000692 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
693 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000694 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000695 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
696 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000697 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000698 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
699 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000700 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
702 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000703 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000704 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
705 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
708 AMDGPU::T0_Z, VT);
709 }
710 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
711 break;
712 }
713 } // end switch(Op.getOpcode())
714 return SDValue();
715}
716
717void R600TargetLowering::ReplaceNodeResults(SDNode *N,
718 SmallVectorImpl<SDValue> &Results,
719 SelectionDAG &DAG) const {
720 switch (N->getOpcode()) {
721 default: return;
722 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000723 return;
724 case ISD::LOAD: {
725 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
726 Results.push_back(SDValue(Node, 0));
727 Results.push_back(SDValue(Node, 1));
728 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
729 // function
730 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
731 return;
732 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000733 case ISD::STORE:
734 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
735 Results.push_back(SDValue(Node, 0));
736 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000737 }
738}
739
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000740SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
741 // On hw >= R700, COS/SIN input must be between -1. and 1.
742 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
743 EVT VT = Op.getValueType();
744 SDValue Arg = Op.getOperand(0);
745 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
746 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
747 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
748 DAG.getConstantFP(0.15915494309, MVT::f32)),
749 DAG.getConstantFP(0.5, MVT::f32)));
750 unsigned TrigNode;
751 switch (Op.getOpcode()) {
752 case ISD::FCOS:
753 TrigNode = AMDGPUISD::COS_HW;
754 break;
755 case ISD::FSIN:
756 TrigNode = AMDGPUISD::SIN_HW;
757 break;
758 default:
759 llvm_unreachable("Wrong trig opcode");
760 }
761 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
762 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
763 DAG.getConstantFP(-0.5, MVT::f32)));
764 if (Gen >= AMDGPUSubtarget::R700)
765 return TrigVal;
766 // On R600 hw, COS/SIN input must be between -Pi and Pi.
767 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
768 DAG.getConstantFP(3.14159265359, MVT::f32));
769}
770
Tom Stellard75aadc22012-12-11 21:25:42 +0000771SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
772 return DAG.getNode(
773 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000774 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 MVT::i1,
776 Op, DAG.getConstantFP(0.0f, MVT::f32),
777 DAG.getCondCode(ISD::SETNE)
778 );
779}
780
Tom Stellard75aadc22012-12-11 21:25:42 +0000781SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000782 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 unsigned DwordOffset) const {
784 unsigned ByteOffset = DwordOffset * 4;
785 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000786 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000787
788 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
789 assert(isInt<16>(ByteOffset));
790
791 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
792 DAG.getConstant(ByteOffset, MVT::i32), // PTR
793 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
794 false, false, false, 0);
795}
796
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000797SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
798
799 MachineFunction &MF = DAG.getMachineFunction();
800 const AMDGPUFrameLowering *TFL =
801 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
802
803 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
804 assert(FIN);
805
806 unsigned FrameIndex = FIN->getIndex();
807 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
808 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
809}
810
Tom Stellard75aadc22012-12-11 21:25:42 +0000811bool R600TargetLowering::isZero(SDValue Op) const {
812 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
813 return Cst->isNullValue();
814 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
815 return CstFP->isZero();
816 } else {
817 return false;
818 }
819}
820
821SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000822 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000823 EVT VT = Op.getValueType();
824
825 SDValue LHS = Op.getOperand(0);
826 SDValue RHS = Op.getOperand(1);
827 SDValue True = Op.getOperand(2);
828 SDValue False = Op.getOperand(3);
829 SDValue CC = Op.getOperand(4);
830 SDValue Temp;
831
832 // LHS and RHS are guaranteed to be the same value type
833 EVT CompareVT = LHS.getValueType();
834
835 // Check if we can lower this to a native operation.
836
Tom Stellard2add82d2013-03-08 15:37:09 +0000837 // Try to lower to a SET* instruction:
838 //
839 // SET* can match the following patterns:
840 //
841 // select_cc f32, f32, -1, 0, cc_any
842 // select_cc f32, f32, 1.0f, 0.0f, cc_any
843 // select_cc i32, i32, -1, 0, cc_any
844 //
845
846 // Move hardware True/False values to the correct operand.
847 if (isHWTrueValue(False) && isHWFalseValue(True)) {
848 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
849 std::swap(False, True);
850 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
851 }
852
853 if (isHWTrueValue(True) && isHWFalseValue(False) &&
854 (CompareVT == VT || VT == MVT::i32)) {
855 // This can be matched by a SET* instruction.
856 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
857 }
858
Tom Stellard75aadc22012-12-11 21:25:42 +0000859 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000860 //
861 // CND* can match the following patterns:
862 //
863 // select_cc f32, 0.0, f32, f32, cc_any
864 // select_cc f32, 0.0, i32, i32, cc_any
865 // select_cc i32, 0, f32, f32, cc_any
866 // select_cc i32, 0, i32, i32, cc_any
867 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000868 if (isZero(LHS) || isZero(RHS)) {
869 SDValue Cond = (isZero(LHS) ? RHS : LHS);
870 SDValue Zero = (isZero(LHS) ? LHS : RHS);
871 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
872 if (CompareVT != VT) {
873 // Bitcast True / False to the correct types. This will end up being
874 // a nop, but it allows us to define only a single pattern in the
875 // .TD files for each CND* instruction rather than having to have
876 // one pattern for integer True/False and one for fp True/False
877 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
878 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
879 }
880 if (isZero(LHS)) {
881 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
882 }
883
884 switch (CCOpcode) {
885 case ISD::SETONE:
886 case ISD::SETUNE:
887 case ISD::SETNE:
888 case ISD::SETULE:
889 case ISD::SETULT:
890 case ISD::SETOLE:
891 case ISD::SETOLT:
892 case ISD::SETLE:
893 case ISD::SETLT:
894 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
895 Temp = True;
896 True = False;
897 False = Temp;
898 break;
899 default:
900 break;
901 }
902 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
903 Cond, Zero,
904 True, False,
905 DAG.getCondCode(CCOpcode));
906 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
907 }
908
Tom Stellard75aadc22012-12-11 21:25:42 +0000909
910 // Possible Min/Max pattern
911 SDValue MinMax = LowerMinMax(Op, DAG);
912 if (MinMax.getNode()) {
913 return MinMax;
914 }
915
916 // If we make it this for it means we have no native instructions to handle
917 // this SELECT_CC, so we must lower it.
918 SDValue HWTrue, HWFalse;
919
920 if (CompareVT == MVT::f32) {
921 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
922 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
923 } else if (CompareVT == MVT::i32) {
924 HWTrue = DAG.getConstant(-1, CompareVT);
925 HWFalse = DAG.getConstant(0, CompareVT);
926 }
927 else {
928 assert(!"Unhandled value type in LowerSELECT_CC");
929 }
930
931 // Lower this unsupported SELECT_CC into a combination of two supported
932 // SELECT_CC operations.
933 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
934
935 return DAG.getNode(ISD::SELECT_CC, DL, VT,
936 Cond, HWFalse,
937 True, False,
938 DAG.getCondCode(ISD::SETNE));
939}
940
941SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
942 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000943 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000944 Op.getValueType(),
945 Op.getOperand(0),
946 DAG.getConstant(0, MVT::i32),
947 Op.getOperand(1),
948 Op.getOperand(2),
949 DAG.getCondCode(ISD::SETNE));
950}
951
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000952/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
953/// convert these pointers to a register index. Each register holds
954/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
955/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
956/// for indirect addressing.
957SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
958 unsigned StackWidth,
959 SelectionDAG &DAG) const {
960 unsigned SRLPad;
961 switch(StackWidth) {
962 case 1:
963 SRLPad = 2;
964 break;
965 case 2:
966 SRLPad = 3;
967 break;
968 case 4:
969 SRLPad = 4;
970 break;
971 default: llvm_unreachable("Invalid stack width");
972 }
973
Andrew Trickef9de2a2013-05-25 02:42:55 +0000974 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000975 DAG.getConstant(SRLPad, MVT::i32));
976}
977
978void R600TargetLowering::getStackAddress(unsigned StackWidth,
979 unsigned ElemIdx,
980 unsigned &Channel,
981 unsigned &PtrIncr) const {
982 switch (StackWidth) {
983 default:
984 case 1:
985 Channel = 0;
986 if (ElemIdx > 0) {
987 PtrIncr = 1;
988 } else {
989 PtrIncr = 0;
990 }
991 break;
992 case 2:
993 Channel = ElemIdx % 2;
994 if (ElemIdx == 2) {
995 PtrIncr = 1;
996 } else {
997 PtrIncr = 0;
998 }
999 break;
1000 case 4:
1001 Channel = ElemIdx;
1002 PtrIncr = 0;
1003 break;
1004 }
1005}
1006
Tom Stellard75aadc22012-12-11 21:25:42 +00001007SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001008 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001009 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1010 SDValue Chain = Op.getOperand(0);
1011 SDValue Value = Op.getOperand(1);
1012 SDValue Ptr = Op.getOperand(2);
1013
Tom Stellardfbab8272013-08-16 01:12:11 +00001014 SDValue Result = AMDGPUTargetLowering::LowerVectorStore(Op, DAG);
1015 if (Result.getNode()) {
1016 return Result;
1017 }
1018
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001019 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1020 if (StoreNode->isTruncatingStore()) {
1021 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001022 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001023 EVT MemVT = StoreNode->getMemoryVT();
1024 SDValue MaskConstant;
1025 if (MemVT == MVT::i8) {
1026 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1027 } else {
1028 assert(MemVT == MVT::i16);
1029 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1030 }
1031 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1032 DAG.getConstant(2, MVT::i32));
1033 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1034 DAG.getConstant(0x00000003, VT));
1035 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1036 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1037 DAG.getConstant(3, VT));
1038 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1039 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1040 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1041 // vector instead.
1042 SDValue Src[4] = {
1043 ShiftedValue,
1044 DAG.getConstant(0, MVT::i32),
1045 DAG.getConstant(0, MVT::i32),
1046 Mask
1047 };
1048 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1049 SDValue Args[3] = { Chain, Input, DWordAddr };
1050 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1051 Op->getVTList(), Args, 3, MemVT,
1052 StoreNode->getMemOperand());
1053 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1054 Value.getValueType().bitsGE(MVT::i32)) {
1055 // Convert pointer from byte address to dword address.
1056 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1057 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1058 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001059
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1061 assert(!"Truncated and indexed stores not supported yet");
1062 } else {
1063 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1064 }
1065 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001066 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001067 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001068
1069 EVT ValueVT = Value.getValueType();
1070
1071 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1072 return SDValue();
1073 }
1074
1075 // Lowering for indirect addressing
1076
1077 const MachineFunction &MF = DAG.getMachineFunction();
1078 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1079 getTargetMachine().getFrameLowering());
1080 unsigned StackWidth = TFL->getStackWidth(MF);
1081
1082 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1083
1084 if (ValueVT.isVector()) {
1085 unsigned NumElemVT = ValueVT.getVectorNumElements();
1086 EVT ElemVT = ValueVT.getVectorElementType();
1087 SDValue Stores[4];
1088
1089 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1090 "vector width in load");
1091
1092 for (unsigned i = 0; i < NumElemVT; ++i) {
1093 unsigned Channel, PtrIncr;
1094 getStackAddress(StackWidth, i, Channel, PtrIncr);
1095 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1096 DAG.getConstant(PtrIncr, MVT::i32));
1097 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1098 Value, DAG.getConstant(i, MVT::i32));
1099
1100 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1101 Chain, Elem, Ptr,
1102 DAG.getTargetConstant(Channel, MVT::i32));
1103 }
1104 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1105 } else {
1106 if (ValueVT == MVT::i8) {
1107 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1108 }
1109 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001110 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001111 }
1112
1113 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001114}
1115
Tom Stellard365366f2013-01-23 02:09:06 +00001116// return (512 + (kc_bank << 12)
1117static int
1118ConstantAddressBlock(unsigned AddressSpace) {
1119 switch (AddressSpace) {
1120 case AMDGPUAS::CONSTANT_BUFFER_0:
1121 return 512;
1122 case AMDGPUAS::CONSTANT_BUFFER_1:
1123 return 512 + 4096;
1124 case AMDGPUAS::CONSTANT_BUFFER_2:
1125 return 512 + 4096 * 2;
1126 case AMDGPUAS::CONSTANT_BUFFER_3:
1127 return 512 + 4096 * 3;
1128 case AMDGPUAS::CONSTANT_BUFFER_4:
1129 return 512 + 4096 * 4;
1130 case AMDGPUAS::CONSTANT_BUFFER_5:
1131 return 512 + 4096 * 5;
1132 case AMDGPUAS::CONSTANT_BUFFER_6:
1133 return 512 + 4096 * 6;
1134 case AMDGPUAS::CONSTANT_BUFFER_7:
1135 return 512 + 4096 * 7;
1136 case AMDGPUAS::CONSTANT_BUFFER_8:
1137 return 512 + 4096 * 8;
1138 case AMDGPUAS::CONSTANT_BUFFER_9:
1139 return 512 + 4096 * 9;
1140 case AMDGPUAS::CONSTANT_BUFFER_10:
1141 return 512 + 4096 * 10;
1142 case AMDGPUAS::CONSTANT_BUFFER_11:
1143 return 512 + 4096 * 11;
1144 case AMDGPUAS::CONSTANT_BUFFER_12:
1145 return 512 + 4096 * 12;
1146 case AMDGPUAS::CONSTANT_BUFFER_13:
1147 return 512 + 4096 * 13;
1148 case AMDGPUAS::CONSTANT_BUFFER_14:
1149 return 512 + 4096 * 14;
1150 case AMDGPUAS::CONSTANT_BUFFER_15:
1151 return 512 + 4096 * 15;
1152 default:
1153 return -1;
1154 }
1155}
1156
1157SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1158{
1159 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001160 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001161 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1162 SDValue Chain = Op.getOperand(0);
1163 SDValue Ptr = Op.getOperand(1);
1164 SDValue LoweredLoad;
1165
1166 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1167 if (ConstantBlock > -1) {
1168 SDValue Result;
1169 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001170 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1171 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001172 SDValue Slots[4];
1173 for (unsigned i = 0; i < 4; i++) {
1174 // We want Const position encoded with the following formula :
1175 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1176 // const_index is Ptr computed by llvm using an alignment of 16.
1177 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1178 // then div by 4 at the ISel step
1179 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1180 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1181 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1182 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001183 EVT NewVT = MVT::v4i32;
1184 unsigned NumElements = 4;
1185 if (VT.isVector()) {
1186 NewVT = VT;
1187 NumElements = VT.getVectorNumElements();
1188 }
1189 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001190 } else {
1191 // non constant ptr cant be folded, keeps it as a v4f32 load
1192 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001193 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001194 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001195 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001196 );
1197 }
1198
1199 if (!VT.isVector()) {
1200 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1201 DAG.getConstant(0, MVT::i32));
1202 }
1203
1204 SDValue MergedValues[2] = {
1205 Result,
1206 Chain
1207 };
1208 return DAG.getMergeValues(MergedValues, 2, DL);
1209 }
1210
Tom Stellard84021442013-07-23 01:48:24 +00001211 // For most operations returning SDValue() will result int he node being
1212 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1213 // we need to manually expand loads that may be legal in some address spaces
1214 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1215 // for compute shaders, since the data is sign extended when it is uploaded
1216 // to the buffer. Howerver SEXT loads from other addresspaces are not
1217 // supported, so we need to expand them here.
1218 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1219 EVT MemVT = LoadNode->getMemoryVT();
1220 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1221 SDValue ShiftAmount =
1222 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1223 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1224 LoadNode->getPointerInfo(), MemVT,
1225 LoadNode->isVolatile(),
1226 LoadNode->isNonTemporal(),
1227 LoadNode->getAlignment());
1228 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1229 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1230
1231 SDValue MergedValues[2] = { Sra, Chain };
1232 return DAG.getMergeValues(MergedValues, 2, DL);
1233 }
1234
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001235 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1236 return SDValue();
1237 }
1238
1239 // Lowering for indirect addressing
1240 const MachineFunction &MF = DAG.getMachineFunction();
1241 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1242 getTargetMachine().getFrameLowering());
1243 unsigned StackWidth = TFL->getStackWidth(MF);
1244
1245 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1246
1247 if (VT.isVector()) {
1248 unsigned NumElemVT = VT.getVectorNumElements();
1249 EVT ElemVT = VT.getVectorElementType();
1250 SDValue Loads[4];
1251
1252 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1253 "vector width in load");
1254
1255 for (unsigned i = 0; i < NumElemVT; ++i) {
1256 unsigned Channel, PtrIncr;
1257 getStackAddress(StackWidth, i, Channel, PtrIncr);
1258 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1259 DAG.getConstant(PtrIncr, MVT::i32));
1260 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1261 Chain, Ptr,
1262 DAG.getTargetConstant(Channel, MVT::i32),
1263 Op.getOperand(2));
1264 }
1265 for (unsigned i = NumElemVT; i < 4; ++i) {
1266 Loads[i] = DAG.getUNDEF(ElemVT);
1267 }
1268 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1269 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1270 } else {
1271 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1272 Chain, Ptr,
1273 DAG.getTargetConstant(0, MVT::i32), // Channel
1274 Op.getOperand(2));
1275 }
1276
1277 SDValue Ops[2];
1278 Ops[0] = LoweredLoad;
1279 Ops[1] = Chain;
1280
1281 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001282}
Tom Stellard75aadc22012-12-11 21:25:42 +00001283
Tom Stellard75aadc22012-12-11 21:25:42 +00001284/// XXX Only kernel functions are supported, so we can assume for now that
1285/// every function is a kernel function, but in the future we should use
1286/// separate calling conventions for kernel and non-kernel functions.
1287SDValue R600TargetLowering::LowerFormalArguments(
1288 SDValue Chain,
1289 CallingConv::ID CallConv,
1290 bool isVarArg,
1291 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001292 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001293 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001294 SmallVector<CCValAssign, 16> ArgLocs;
1295 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1296 getTargetMachine(), ArgLocs, *DAG.getContext());
1297
1298 AnalyzeFormalArguments(CCInfo, Ins);
1299
Tom Stellard1e803092013-07-23 01:48:18 +00001300 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001301 CCValAssign &VA = ArgLocs[i];
1302 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001303
Tom Stellard75aadc22012-12-11 21:25:42 +00001304 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001305 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001306
1307 // The first 36 bytes of the input buffer contains information about
1308 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001309 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001310 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001311 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1312 false, false, 4); // 4 is the prefered alignment for
1313 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001314 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001315 }
1316 return Chain;
1317}
1318
Matt Arsenault758659232013-05-18 00:21:46 +00001319EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001320 if (!VT.isVector()) return MVT::i32;
1321 return VT.changeVectorElementTypeToInteger();
1322}
1323
Benjamin Kramer193960c2013-06-11 13:32:25 +00001324static SDValue
1325CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1326 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001327 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1328 assert(RemapSwizzle.empty());
1329 SDValue NewBldVec[4] = {
1330 VectorEntry.getOperand(0),
1331 VectorEntry.getOperand(1),
1332 VectorEntry.getOperand(2),
1333 VectorEntry.getOperand(3)
1334 };
1335
1336 for (unsigned i = 0; i < 4; i++) {
1337 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1338 if (C->isZero()) {
1339 RemapSwizzle[i] = 4; // SEL_0
1340 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1341 } else if (C->isExactlyValue(1.0)) {
1342 RemapSwizzle[i] = 5; // SEL_1
1343 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1344 }
1345 }
1346
1347 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1348 continue;
1349 for (unsigned j = 0; j < i; j++) {
1350 if (NewBldVec[i] == NewBldVec[j]) {
1351 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1352 RemapSwizzle[i] = j;
1353 break;
1354 }
1355 }
1356 }
1357
1358 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1359 VectorEntry.getValueType(), NewBldVec, 4);
1360}
1361
Benjamin Kramer193960c2013-06-11 13:32:25 +00001362static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1363 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001364 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1365 assert(RemapSwizzle.empty());
1366 SDValue NewBldVec[4] = {
1367 VectorEntry.getOperand(0),
1368 VectorEntry.getOperand(1),
1369 VectorEntry.getOperand(2),
1370 VectorEntry.getOperand(3)
1371 };
1372 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001373 for (unsigned i = 0; i < 4; i++)
1374 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001375
1376 for (unsigned i = 0; i < 4; i++) {
1377 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1378 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1379 ->getZExtValue();
1380 if (!isUnmovable[Idx]) {
1381 // Swap i and Idx
1382 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001383 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001384 }
1385 isUnmovable[Idx] = true;
1386 }
1387 }
1388
1389 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1390 VectorEntry.getValueType(), NewBldVec, 4);
1391}
1392
1393
1394SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1395SDValue Swz[4], SelectionDAG &DAG) const {
1396 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1397 // Old -> New swizzle values
1398 DenseMap<unsigned, unsigned> SwizzleRemap;
1399
1400 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1401 for (unsigned i = 0; i < 4; i++) {
1402 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1403 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1404 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1405 }
1406
1407 SwizzleRemap.clear();
1408 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1409 for (unsigned i = 0; i < 4; i++) {
1410 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1411 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1412 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1413 }
1414
1415 return BuildVector;
1416}
1417
1418
Tom Stellard75aadc22012-12-11 21:25:42 +00001419//===----------------------------------------------------------------------===//
1420// Custom DAG Optimizations
1421//===----------------------------------------------------------------------===//
1422
1423SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1424 DAGCombinerInfo &DCI) const {
1425 SelectionDAG &DAG = DCI.DAG;
1426
1427 switch (N->getOpcode()) {
1428 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1429 case ISD::FP_ROUND: {
1430 SDValue Arg = N->getOperand(0);
1431 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001432 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001433 Arg.getOperand(0));
1434 }
1435 break;
1436 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001437
1438 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1439 // (i32 select_cc f32, f32, -1, 0 cc)
1440 //
1441 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1442 // this to one of the SET*_DX10 instructions.
1443 case ISD::FP_TO_SINT: {
1444 SDValue FNeg = N->getOperand(0);
1445 if (FNeg.getOpcode() != ISD::FNEG) {
1446 return SDValue();
1447 }
1448 SDValue SelectCC = FNeg.getOperand(0);
1449 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1450 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1451 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1452 !isHWTrueValue(SelectCC.getOperand(2)) ||
1453 !isHWFalseValue(SelectCC.getOperand(3))) {
1454 return SDValue();
1455 }
1456
Andrew Trickef9de2a2013-05-25 02:42:55 +00001457 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001458 SelectCC.getOperand(0), // LHS
1459 SelectCC.getOperand(1), // RHS
1460 DAG.getConstant(-1, MVT::i32), // True
1461 DAG.getConstant(0, MVT::i32), // Flase
1462 SelectCC.getOperand(4)); // CC
1463
1464 break;
1465 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001466
1467 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1468 // => build_vector elt0, …, NewEltIdx, …, eltN
1469 case ISD::INSERT_VECTOR_ELT: {
1470 SDValue InVec = N->getOperand(0);
1471 SDValue InVal = N->getOperand(1);
1472 SDValue EltNo = N->getOperand(2);
1473 SDLoc dl(N);
1474
1475 // If the inserted element is an UNDEF, just use the input vector.
1476 if (InVal.getOpcode() == ISD::UNDEF)
1477 return InVec;
1478
1479 EVT VT = InVec.getValueType();
1480
1481 // If we can't generate a legal BUILD_VECTOR, exit
1482 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1483 return SDValue();
1484
1485 // Check that we know which element is being inserted
1486 if (!isa<ConstantSDNode>(EltNo))
1487 return SDValue();
1488 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1489
1490 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1491 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1492 // vector elements.
1493 SmallVector<SDValue, 8> Ops;
1494 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1495 Ops.append(InVec.getNode()->op_begin(),
1496 InVec.getNode()->op_end());
1497 } else if (InVec.getOpcode() == ISD::UNDEF) {
1498 unsigned NElts = VT.getVectorNumElements();
1499 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1500 } else {
1501 return SDValue();
1502 }
1503
1504 // Insert the element
1505 if (Elt < Ops.size()) {
1506 // All the operands of BUILD_VECTOR must have the same type;
1507 // we enforce that here.
1508 EVT OpVT = Ops[0].getValueType();
1509 if (InVal.getValueType() != OpVT)
1510 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1511 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1512 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1513 Ops[Elt] = InVal;
1514 }
1515
1516 // Return the new vector
1517 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1518 VT, &Ops[0], Ops.size());
1519 }
1520
Tom Stellard365366f2013-01-23 02:09:06 +00001521 // Extract_vec (Build_vector) generated by custom lowering
1522 // also needs to be customly combined
1523 case ISD::EXTRACT_VECTOR_ELT: {
1524 SDValue Arg = N->getOperand(0);
1525 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1526 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1527 unsigned Element = Const->getZExtValue();
1528 return Arg->getOperand(Element);
1529 }
1530 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001531 if (Arg.getOpcode() == ISD::BITCAST &&
1532 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1533 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1534 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001535 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001536 Arg->getOperand(0).getOperand(Element));
1537 }
1538 }
Tom Stellard365366f2013-01-23 02:09:06 +00001539 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001540
1541 case ISD::SELECT_CC: {
1542 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1543 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001544 //
1545 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1546 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001547 SDValue LHS = N->getOperand(0);
1548 if (LHS.getOpcode() != ISD::SELECT_CC) {
1549 return SDValue();
1550 }
1551
1552 SDValue RHS = N->getOperand(1);
1553 SDValue True = N->getOperand(2);
1554 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001555 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001556
1557 if (LHS.getOperand(2).getNode() != True.getNode() ||
1558 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001559 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001560 return SDValue();
1561 }
1562
Tom Stellard5e524892013-03-08 15:37:11 +00001563 switch (NCC) {
1564 default: return SDValue();
1565 case ISD::SETNE: return LHS;
1566 case ISD::SETEQ: {
1567 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1568 LHSCC = ISD::getSetCCInverse(LHSCC,
1569 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001570 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001571 LHS.getOperand(0),
1572 LHS.getOperand(1),
1573 LHS.getOperand(2),
1574 LHS.getOperand(3),
1575 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001576 }
Tom Stellard5e524892013-03-08 15:37:11 +00001577 }
1578 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001579
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001580 case AMDGPUISD::EXPORT: {
1581 SDValue Arg = N->getOperand(1);
1582 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1583 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001584
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001585 SDValue NewArgs[8] = {
1586 N->getOperand(0), // Chain
1587 SDValue(),
1588 N->getOperand(2), // ArrayBase
1589 N->getOperand(3), // Type
1590 N->getOperand(4), // SWZ_X
1591 N->getOperand(5), // SWZ_Y
1592 N->getOperand(6), // SWZ_Z
1593 N->getOperand(7) // SWZ_W
1594 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001595 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001596 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001597 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001598 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001599 case AMDGPUISD::TEXTURE_FETCH: {
1600 SDValue Arg = N->getOperand(1);
1601 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1602 break;
1603
1604 SDValue NewArgs[19] = {
1605 N->getOperand(0),
1606 N->getOperand(1),
1607 N->getOperand(2),
1608 N->getOperand(3),
1609 N->getOperand(4),
1610 N->getOperand(5),
1611 N->getOperand(6),
1612 N->getOperand(7),
1613 N->getOperand(8),
1614 N->getOperand(9),
1615 N->getOperand(10),
1616 N->getOperand(11),
1617 N->getOperand(12),
1618 N->getOperand(13),
1619 N->getOperand(14),
1620 N->getOperand(15),
1621 N->getOperand(16),
1622 N->getOperand(17),
1623 N->getOperand(18),
1624 };
1625 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1626 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1627 NewArgs, 19);
1628 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001629 }
1630 return SDValue();
1631}