blob: 450e2a86da32d39c2b88a5b437fb0977cbca4c06 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000041 setOperationAction(ISD::FCOS, MVT::f32, Custom);
42 setOperationAction(ISD::FSIN, MVT::f32, Custom);
43
Tom Stellard75aadc22012-12-11 21:25:42 +000044 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000045 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000046
Tom Stellard492ebea2013-03-08 15:37:07 +000047 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
48 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
50 setOperationAction(ISD::FSUB, MVT::f32, Expand);
51
52 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
53 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
54 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard75aadc22012-12-11 21:25:42 +000056 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
57 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
58
Tom Stellarde8f9f282013-03-08 15:37:05 +000059 setOperationAction(ISD::SETCC, MVT::i32, Expand);
60 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000061 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
62
Tom Stellard53f2f902013-09-05 18:38:03 +000063 setOperationAction(ISD::SELECT, MVT::i32, Expand);
64 setOperationAction(ISD::SELECT, MVT::f32, Expand);
65 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
66 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
67 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
68 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000070 // Legalize loads and stores to the private address space.
71 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000072 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000073 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000074 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
75 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
76 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
77 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000078 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000080 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000081 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +000082 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
83 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000084
Tom Stellard365366f2013-01-23 02:09:06 +000085 setOperationAction(ISD::LOAD, MVT::i32, Custom);
86 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000087 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
88
Tom Stellard75aadc22012-12-11 21:25:42 +000089 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000090 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000091 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000092 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +000093 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000094
Michel Danzer49812b52013-07-10 16:37:07 +000095 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
96
Tom Stellardb852af52013-03-08 15:37:03 +000097 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000098 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +000099 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100}
101
102MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
103 MachineInstr * MI, MachineBasicBlock * BB) const {
104 MachineFunction * MF = BB->getParent();
105 MachineRegisterInfo &MRI = MF->getRegInfo();
106 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000107 const R600InstrInfo *TII =
108 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
110 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000111 default:
112 if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) {
113 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
114 TII->get(MI->getOpcode()),
115 AMDGPU::OQAP);
116 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
117 NewMI.addOperand(MI->getOperand(i));
118 }
119 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
120 MI->getOperand(0).getReg(),
121 AMDGPU::OQAP);
122 } else {
123 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
124 }
125 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000126 case AMDGPU::CLAMP_R600: {
127 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
128 AMDGPU::MOV,
129 MI->getOperand(0).getReg(),
130 MI->getOperand(1).getReg());
131 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
132 break;
133 }
134
135 case AMDGPU::FABS_R600: {
136 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
137 AMDGPU::MOV,
138 MI->getOperand(0).getReg(),
139 MI->getOperand(1).getReg());
140 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
141 break;
142 }
143
144 case AMDGPU::FNEG_R600: {
145 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
146 AMDGPU::MOV,
147 MI->getOperand(0).getReg(),
148 MI->getOperand(1).getReg());
149 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
150 break;
151 }
152
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 case AMDGPU::MASK_WRITE: {
154 unsigned maskedRegister = MI->getOperand(0).getReg();
155 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
156 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
157 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
158 break;
159 }
160
161 case AMDGPU::MOV_IMM_F32:
162 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
163 MI->getOperand(1).getFPImm()->getValueAPF()
164 .bitcastToAPInt().getZExtValue());
165 break;
166 case AMDGPU::MOV_IMM_I32:
167 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
168 MI->getOperand(1).getImm());
169 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000170 case AMDGPU::CONST_COPY: {
171 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
172 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000173 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000174 MI->getOperand(1).getImm());
175 break;
176 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000177
178 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000179 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000180 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
181 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
182
183 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
184 .addOperand(MI->getOperand(0))
185 .addOperand(MI->getOperand(1))
186 .addImm(EOP); // Set End of program bit
187 break;
188 }
189
Tom Stellard75aadc22012-12-11 21:25:42 +0000190 case AMDGPU::TXD: {
191 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
192 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000193 MachineOperand &RID = MI->getOperand(4);
194 MachineOperand &SID = MI->getOperand(5);
195 unsigned TextureId = MI->getOperand(6).getImm();
196 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
197 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000198
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000199 switch (TextureId) {
200 case 5: // Rect
201 CTX = CTY = 0;
202 break;
203 case 6: // Shadow1D
204 SrcW = SrcZ;
205 break;
206 case 7: // Shadow2D
207 SrcW = SrcZ;
208 break;
209 case 8: // ShadowRect
210 CTX = CTY = 0;
211 SrcW = SrcZ;
212 break;
213 case 9: // 1DArray
214 SrcZ = SrcY;
215 CTZ = 0;
216 break;
217 case 10: // 2DArray
218 CTZ = 0;
219 break;
220 case 11: // Shadow1DArray
221 SrcZ = SrcY;
222 CTZ = 0;
223 break;
224 case 12: // Shadow2DArray
225 CTZ = 0;
226 break;
227 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
229 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000230 .addImm(SrcX)
231 .addImm(SrcY)
232 .addImm(SrcZ)
233 .addImm(SrcW)
234 .addImm(0)
235 .addImm(0)
236 .addImm(0)
237 .addImm(0)
238 .addImm(1)
239 .addImm(2)
240 .addImm(3)
241 .addOperand(RID)
242 .addOperand(SID)
243 .addImm(CTX)
244 .addImm(CTY)
245 .addImm(CTZ)
246 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000247 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
248 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000249 .addImm(SrcX)
250 .addImm(SrcY)
251 .addImm(SrcZ)
252 .addImm(SrcW)
253 .addImm(0)
254 .addImm(0)
255 .addImm(0)
256 .addImm(0)
257 .addImm(1)
258 .addImm(2)
259 .addImm(3)
260 .addOperand(RID)
261 .addOperand(SID)
262 .addImm(CTX)
263 .addImm(CTY)
264 .addImm(CTZ)
265 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000266 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
267 .addOperand(MI->getOperand(0))
268 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000269 .addImm(SrcX)
270 .addImm(SrcY)
271 .addImm(SrcZ)
272 .addImm(SrcW)
273 .addImm(0)
274 .addImm(0)
275 .addImm(0)
276 .addImm(0)
277 .addImm(1)
278 .addImm(2)
279 .addImm(3)
280 .addOperand(RID)
281 .addOperand(SID)
282 .addImm(CTX)
283 .addImm(CTY)
284 .addImm(CTZ)
285 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000286 .addReg(T0, RegState::Implicit)
287 .addReg(T1, RegState::Implicit);
288 break;
289 }
290
291 case AMDGPU::TXD_SHADOW: {
292 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
293 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000294 MachineOperand &RID = MI->getOperand(4);
295 MachineOperand &SID = MI->getOperand(5);
296 unsigned TextureId = MI->getOperand(6).getImm();
297 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
298 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
299
300 switch (TextureId) {
301 case 5: // Rect
302 CTX = CTY = 0;
303 break;
304 case 6: // Shadow1D
305 SrcW = SrcZ;
306 break;
307 case 7: // Shadow2D
308 SrcW = SrcZ;
309 break;
310 case 8: // ShadowRect
311 CTX = CTY = 0;
312 SrcW = SrcZ;
313 break;
314 case 9: // 1DArray
315 SrcZ = SrcY;
316 CTZ = 0;
317 break;
318 case 10: // 2DArray
319 CTZ = 0;
320 break;
321 case 11: // Shadow1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 12: // Shadow2DArray
326 CTZ = 0;
327 break;
328 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000329
330 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
331 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000332 .addImm(SrcX)
333 .addImm(SrcY)
334 .addImm(SrcZ)
335 .addImm(SrcW)
336 .addImm(0)
337 .addImm(0)
338 .addImm(0)
339 .addImm(0)
340 .addImm(1)
341 .addImm(2)
342 .addImm(3)
343 .addOperand(RID)
344 .addOperand(SID)
345 .addImm(CTX)
346 .addImm(CTY)
347 .addImm(CTZ)
348 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000349 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
350 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000351 .addImm(SrcX)
352 .addImm(SrcY)
353 .addImm(SrcZ)
354 .addImm(SrcW)
355 .addImm(0)
356 .addImm(0)
357 .addImm(0)
358 .addImm(0)
359 .addImm(1)
360 .addImm(2)
361 .addImm(3)
362 .addOperand(RID)
363 .addOperand(SID)
364 .addImm(CTX)
365 .addImm(CTY)
366 .addImm(CTZ)
367 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000368 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
369 .addOperand(MI->getOperand(0))
370 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000371 .addImm(SrcX)
372 .addImm(SrcY)
373 .addImm(SrcZ)
374 .addImm(SrcW)
375 .addImm(0)
376 .addImm(0)
377 .addImm(0)
378 .addImm(0)
379 .addImm(1)
380 .addImm(2)
381 .addImm(3)
382 .addOperand(RID)
383 .addOperand(SID)
384 .addImm(CTX)
385 .addImm(CTY)
386 .addImm(CTZ)
387 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000388 .addReg(T0, RegState::Implicit)
389 .addReg(T1, RegState::Implicit);
390 break;
391 }
392
393 case AMDGPU::BRANCH:
394 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000395 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000396 break;
397
398 case AMDGPU::BRANCH_COND_f32: {
399 MachineInstr *NewMI =
400 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
401 AMDGPU::PREDICATE_BIT)
402 .addOperand(MI->getOperand(1))
403 .addImm(OPCODE_IS_NOT_ZERO)
404 .addImm(0); // Flags
405 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000406 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000407 .addOperand(MI->getOperand(0))
408 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
409 break;
410 }
411
412 case AMDGPU::BRANCH_COND_i32: {
413 MachineInstr *NewMI =
414 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
415 AMDGPU::PREDICATE_BIT)
416 .addOperand(MI->getOperand(1))
417 .addImm(OPCODE_IS_NOT_ZERO_INT)
418 .addImm(0); // Flags
419 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000420 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000421 .addOperand(MI->getOperand(0))
422 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
423 break;
424 }
425
Tom Stellard75aadc22012-12-11 21:25:42 +0000426 case AMDGPU::EG_ExportSwz:
427 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000428 // Instruction is left unmodified if its not the last one of its type
429 bool isLastInstructionOfItsType = true;
430 unsigned InstExportType = MI->getOperand(1).getImm();
431 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
432 EndBlock = BB->end(); NextExportInst != EndBlock;
433 NextExportInst = llvm::next(NextExportInst)) {
434 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
435 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
436 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
437 .getImm();
438 if (CurrentInstExportType == InstExportType) {
439 isLastInstructionOfItsType = false;
440 break;
441 }
442 }
443 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000444 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000445 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000446 return BB;
447 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
448 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
449 .addOperand(MI->getOperand(0))
450 .addOperand(MI->getOperand(1))
451 .addOperand(MI->getOperand(2))
452 .addOperand(MI->getOperand(3))
453 .addOperand(MI->getOperand(4))
454 .addOperand(MI->getOperand(5))
455 .addOperand(MI->getOperand(6))
456 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000457 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000458 break;
459 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000460 case AMDGPU::RETURN: {
461 // RETURN instructions must have the live-out registers as implicit uses,
462 // otherwise they appear dead.
463 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
464 MachineInstrBuilder MIB(*MF, MI);
465 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
466 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
467 return BB;
468 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000469 }
470
471 MI->eraseFromParent();
472 return BB;
473}
474
475//===----------------------------------------------------------------------===//
476// Custom DAG Lowering Operations
477//===----------------------------------------------------------------------===//
478
Tom Stellard75aadc22012-12-11 21:25:42 +0000479SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000480 MachineFunction &MF = DAG.getMachineFunction();
481 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 switch (Op.getOpcode()) {
483 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000484 case ISD::FCOS:
485 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000488 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000489 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000490 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000491 case ISD::INTRINSIC_VOID: {
492 SDValue Chain = Op.getOperand(0);
493 unsigned IntrinsicID =
494 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
495 switch (IntrinsicID) {
496 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000497 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
498 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000499 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000500 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000501 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000502 case AMDGPUIntrinsic::R600_store_swizzle: {
503 const SDValue Args[8] = {
504 Chain,
505 Op.getOperand(2), // Export Value
506 Op.getOperand(3), // ArrayBase
507 Op.getOperand(4), // Type
508 DAG.getConstant(0, MVT::i32), // SWZ_X
509 DAG.getConstant(1, MVT::i32), // SWZ_Y
510 DAG.getConstant(2, MVT::i32), // SWZ_Z
511 DAG.getConstant(3, MVT::i32) // SWZ_W
512 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000513 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000514 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000516
Tom Stellard75aadc22012-12-11 21:25:42 +0000517 // default for switch(IntrinsicID)
518 default: break;
519 }
520 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
521 break;
522 }
523 case ISD::INTRINSIC_WO_CHAIN: {
524 unsigned IntrinsicID =
525 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
526 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000527 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 switch(IntrinsicID) {
529 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
530 case AMDGPUIntrinsic::R600_load_input: {
531 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
532 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000533 MachineFunction &MF = DAG.getMachineFunction();
534 MachineRegisterInfo &MRI = MF.getRegInfo();
535 MRI.addLiveIn(Reg);
536 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000537 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000538 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000539
540 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000541 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000542 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
543 MachineSDNode *interp;
544 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000545 const MachineFunction &MF = DAG.getMachineFunction();
546 const R600InstrInfo *TII =
547 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000548 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
549 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
550 return DAG.getTargetExtractSubreg(
551 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
552 DL, MVT::f32, SDValue(interp, 0));
553 }
554
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000555 MachineFunction &MF = DAG.getMachineFunction();
556 MachineRegisterInfo &MRI = MF.getRegInfo();
557 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
558 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
559 MRI.addLiveIn(RegisterI);
560 MRI.addLiveIn(RegisterJ);
561 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
562 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
563 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
564 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
565
Tom Stellard41afe6a2013-02-05 17:09:14 +0000566 if (slot % 4 < 2)
567 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
568 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000569 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000570 else
571 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
572 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000573 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000574 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000575 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000576 case AMDGPUIntrinsic::R600_tex:
577 case AMDGPUIntrinsic::R600_texc:
578 case AMDGPUIntrinsic::R600_txl:
579 case AMDGPUIntrinsic::R600_txlc:
580 case AMDGPUIntrinsic::R600_txb:
581 case AMDGPUIntrinsic::R600_txbc:
582 case AMDGPUIntrinsic::R600_txf:
583 case AMDGPUIntrinsic::R600_txq:
584 case AMDGPUIntrinsic::R600_ddx:
585 case AMDGPUIntrinsic::R600_ddy: {
586 unsigned TextureOp;
587 switch (IntrinsicID) {
588 case AMDGPUIntrinsic::R600_tex:
589 TextureOp = 0;
590 break;
591 case AMDGPUIntrinsic::R600_texc:
592 TextureOp = 1;
593 break;
594 case AMDGPUIntrinsic::R600_txl:
595 TextureOp = 2;
596 break;
597 case AMDGPUIntrinsic::R600_txlc:
598 TextureOp = 3;
599 break;
600 case AMDGPUIntrinsic::R600_txb:
601 TextureOp = 4;
602 break;
603 case AMDGPUIntrinsic::R600_txbc:
604 TextureOp = 5;
605 break;
606 case AMDGPUIntrinsic::R600_txf:
607 TextureOp = 6;
608 break;
609 case AMDGPUIntrinsic::R600_txq:
610 TextureOp = 7;
611 break;
612 case AMDGPUIntrinsic::R600_ddx:
613 TextureOp = 8;
614 break;
615 case AMDGPUIntrinsic::R600_ddy:
616 TextureOp = 9;
617 break;
618 default:
619 llvm_unreachable("Unknow Texture Operation");
620 }
621
622 SDValue TexArgs[19] = {
623 DAG.getConstant(TextureOp, MVT::i32),
624 Op.getOperand(1),
625 DAG.getConstant(0, MVT::i32),
626 DAG.getConstant(1, MVT::i32),
627 DAG.getConstant(2, MVT::i32),
628 DAG.getConstant(3, MVT::i32),
629 Op.getOperand(2),
630 Op.getOperand(3),
631 Op.getOperand(4),
632 DAG.getConstant(0, MVT::i32),
633 DAG.getConstant(1, MVT::i32),
634 DAG.getConstant(2, MVT::i32),
635 DAG.getConstant(3, MVT::i32),
636 Op.getOperand(5),
637 Op.getOperand(6),
638 Op.getOperand(7),
639 Op.getOperand(8),
640 Op.getOperand(9),
641 Op.getOperand(10)
642 };
643 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
644 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000645 case AMDGPUIntrinsic::AMDGPU_dp4: {
646 SDValue Args[8] = {
647 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
648 DAG.getConstant(0, MVT::i32)),
649 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
650 DAG.getConstant(0, MVT::i32)),
651 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
652 DAG.getConstant(1, MVT::i32)),
653 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
654 DAG.getConstant(1, MVT::i32)),
655 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
656 DAG.getConstant(2, MVT::i32)),
657 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
658 DAG.getConstant(2, MVT::i32)),
659 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
660 DAG.getConstant(3, MVT::i32)),
661 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
662 DAG.getConstant(3, MVT::i32))
663 };
664 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
665 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000666
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000667 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000668 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000669 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000670 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000671 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000672 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000673 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000674 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000675 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000676 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000677 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000678 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000679 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000680 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000681 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000682 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000683 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 return LowerImplicitParameter(DAG, VT, DL, 8);
685
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
688 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
697 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
700 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000701 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000702 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
703 AMDGPU::T0_Z, VT);
704 }
705 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
706 break;
707 }
708 } // end switch(Op.getOpcode())
709 return SDValue();
710}
711
712void R600TargetLowering::ReplaceNodeResults(SDNode *N,
713 SmallVectorImpl<SDValue> &Results,
714 SelectionDAG &DAG) const {
715 switch (N->getOpcode()) {
716 default: return;
717 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000718 return;
719 case ISD::LOAD: {
720 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
721 Results.push_back(SDValue(Node, 0));
722 Results.push_back(SDValue(Node, 1));
723 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
724 // function
725 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
726 return;
727 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000728 case ISD::STORE:
729 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
730 Results.push_back(SDValue(Node, 0));
731 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000732 }
733}
734
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000735SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
736 // On hw >= R700, COS/SIN input must be between -1. and 1.
737 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
738 EVT VT = Op.getValueType();
739 SDValue Arg = Op.getOperand(0);
740 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
741 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
742 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
743 DAG.getConstantFP(0.15915494309, MVT::f32)),
744 DAG.getConstantFP(0.5, MVT::f32)));
745 unsigned TrigNode;
746 switch (Op.getOpcode()) {
747 case ISD::FCOS:
748 TrigNode = AMDGPUISD::COS_HW;
749 break;
750 case ISD::FSIN:
751 TrigNode = AMDGPUISD::SIN_HW;
752 break;
753 default:
754 llvm_unreachable("Wrong trig opcode");
755 }
756 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
757 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
758 DAG.getConstantFP(-0.5, MVT::f32)));
759 if (Gen >= AMDGPUSubtarget::R700)
760 return TrigVal;
761 // On R600 hw, COS/SIN input must be between -Pi and Pi.
762 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
763 DAG.getConstantFP(3.14159265359, MVT::f32));
764}
765
Tom Stellard75aadc22012-12-11 21:25:42 +0000766SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
767 return DAG.getNode(
768 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000769 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 MVT::i1,
771 Op, DAG.getConstantFP(0.0f, MVT::f32),
772 DAG.getCondCode(ISD::SETNE)
773 );
774}
775
Tom Stellard75aadc22012-12-11 21:25:42 +0000776SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000777 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000778 unsigned DwordOffset) const {
779 unsigned ByteOffset = DwordOffset * 4;
780 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000781 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000782
783 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
784 assert(isInt<16>(ByteOffset));
785
786 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
787 DAG.getConstant(ByteOffset, MVT::i32), // PTR
788 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
789 false, false, false, 0);
790}
791
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000792SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
793
794 MachineFunction &MF = DAG.getMachineFunction();
795 const AMDGPUFrameLowering *TFL =
796 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
797
798 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
799 assert(FIN);
800
801 unsigned FrameIndex = FIN->getIndex();
802 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
803 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
804}
805
Tom Stellard75aadc22012-12-11 21:25:42 +0000806bool R600TargetLowering::isZero(SDValue Op) const {
807 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
808 return Cst->isNullValue();
809 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
810 return CstFP->isZero();
811 } else {
812 return false;
813 }
814}
815
816SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000817 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000818 EVT VT = Op.getValueType();
819
820 SDValue LHS = Op.getOperand(0);
821 SDValue RHS = Op.getOperand(1);
822 SDValue True = Op.getOperand(2);
823 SDValue False = Op.getOperand(3);
824 SDValue CC = Op.getOperand(4);
825 SDValue Temp;
826
827 // LHS and RHS are guaranteed to be the same value type
828 EVT CompareVT = LHS.getValueType();
829
830 // Check if we can lower this to a native operation.
831
Tom Stellard2add82d2013-03-08 15:37:09 +0000832 // Try to lower to a SET* instruction:
833 //
834 // SET* can match the following patterns:
835 //
836 // select_cc f32, f32, -1, 0, cc_any
837 // select_cc f32, f32, 1.0f, 0.0f, cc_any
838 // select_cc i32, i32, -1, 0, cc_any
839 //
840
841 // Move hardware True/False values to the correct operand.
842 if (isHWTrueValue(False) && isHWFalseValue(True)) {
843 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
844 std::swap(False, True);
845 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
846 }
847
848 if (isHWTrueValue(True) && isHWFalseValue(False) &&
849 (CompareVT == VT || VT == MVT::i32)) {
850 // This can be matched by a SET* instruction.
851 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
852 }
853
Tom Stellard75aadc22012-12-11 21:25:42 +0000854 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000855 //
856 // CND* can match the following patterns:
857 //
858 // select_cc f32, 0.0, f32, f32, cc_any
859 // select_cc f32, 0.0, i32, i32, cc_any
860 // select_cc i32, 0, f32, f32, cc_any
861 // select_cc i32, 0, i32, i32, cc_any
862 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000863 if (isZero(LHS) || isZero(RHS)) {
864 SDValue Cond = (isZero(LHS) ? RHS : LHS);
865 SDValue Zero = (isZero(LHS) ? LHS : RHS);
866 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
867 if (CompareVT != VT) {
868 // Bitcast True / False to the correct types. This will end up being
869 // a nop, but it allows us to define only a single pattern in the
870 // .TD files for each CND* instruction rather than having to have
871 // one pattern for integer True/False and one for fp True/False
872 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
873 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
874 }
875 if (isZero(LHS)) {
876 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
877 }
878
879 switch (CCOpcode) {
880 case ISD::SETONE:
881 case ISD::SETUNE:
882 case ISD::SETNE:
883 case ISD::SETULE:
884 case ISD::SETULT:
885 case ISD::SETOLE:
886 case ISD::SETOLT:
887 case ISD::SETLE:
888 case ISD::SETLT:
889 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
890 Temp = True;
891 True = False;
892 False = Temp;
893 break;
894 default:
895 break;
896 }
897 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
898 Cond, Zero,
899 True, False,
900 DAG.getCondCode(CCOpcode));
901 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
902 }
903
Tom Stellard75aadc22012-12-11 21:25:42 +0000904
905 // Possible Min/Max pattern
906 SDValue MinMax = LowerMinMax(Op, DAG);
907 if (MinMax.getNode()) {
908 return MinMax;
909 }
910
911 // If we make it this for it means we have no native instructions to handle
912 // this SELECT_CC, so we must lower it.
913 SDValue HWTrue, HWFalse;
914
915 if (CompareVT == MVT::f32) {
916 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
917 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
918 } else if (CompareVT == MVT::i32) {
919 HWTrue = DAG.getConstant(-1, CompareVT);
920 HWFalse = DAG.getConstant(0, CompareVT);
921 }
922 else {
923 assert(!"Unhandled value type in LowerSELECT_CC");
924 }
925
926 // Lower this unsupported SELECT_CC into a combination of two supported
927 // SELECT_CC operations.
928 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
929
930 return DAG.getNode(ISD::SELECT_CC, DL, VT,
931 Cond, HWFalse,
932 True, False,
933 DAG.getCondCode(ISD::SETNE));
934}
935
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000936/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
937/// convert these pointers to a register index. Each register holds
938/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
939/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
940/// for indirect addressing.
941SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
942 unsigned StackWidth,
943 SelectionDAG &DAG) const {
944 unsigned SRLPad;
945 switch(StackWidth) {
946 case 1:
947 SRLPad = 2;
948 break;
949 case 2:
950 SRLPad = 3;
951 break;
952 case 4:
953 SRLPad = 4;
954 break;
955 default: llvm_unreachable("Invalid stack width");
956 }
957
Andrew Trickef9de2a2013-05-25 02:42:55 +0000958 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000959 DAG.getConstant(SRLPad, MVT::i32));
960}
961
962void R600TargetLowering::getStackAddress(unsigned StackWidth,
963 unsigned ElemIdx,
964 unsigned &Channel,
965 unsigned &PtrIncr) const {
966 switch (StackWidth) {
967 default:
968 case 1:
969 Channel = 0;
970 if (ElemIdx > 0) {
971 PtrIncr = 1;
972 } else {
973 PtrIncr = 0;
974 }
975 break;
976 case 2:
977 Channel = ElemIdx % 2;
978 if (ElemIdx == 2) {
979 PtrIncr = 1;
980 } else {
981 PtrIncr = 0;
982 }
983 break;
984 case 4:
985 Channel = ElemIdx;
986 PtrIncr = 0;
987 break;
988 }
989}
990
Tom Stellard75aadc22012-12-11 21:25:42 +0000991SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000992 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000993 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
994 SDValue Chain = Op.getOperand(0);
995 SDValue Value = Op.getOperand(1);
996 SDValue Ptr = Op.getOperand(2);
997
Tom Stellard2ffc3302013-08-26 15:05:44 +0000998 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +0000999 if (Result.getNode()) {
1000 return Result;
1001 }
1002
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001003 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1004 if (StoreNode->isTruncatingStore()) {
1005 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001006 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001007 EVT MemVT = StoreNode->getMemoryVT();
1008 SDValue MaskConstant;
1009 if (MemVT == MVT::i8) {
1010 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1011 } else {
1012 assert(MemVT == MVT::i16);
1013 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1014 }
1015 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1016 DAG.getConstant(2, MVT::i32));
1017 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1018 DAG.getConstant(0x00000003, VT));
1019 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1020 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1021 DAG.getConstant(3, VT));
1022 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1023 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1024 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1025 // vector instead.
1026 SDValue Src[4] = {
1027 ShiftedValue,
1028 DAG.getConstant(0, MVT::i32),
1029 DAG.getConstant(0, MVT::i32),
1030 Mask
1031 };
1032 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1033 SDValue Args[3] = { Chain, Input, DWordAddr };
1034 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1035 Op->getVTList(), Args, 3, MemVT,
1036 StoreNode->getMemOperand());
1037 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1038 Value.getValueType().bitsGE(MVT::i32)) {
1039 // Convert pointer from byte address to dword address.
1040 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1041 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1042 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001043
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001044 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1045 assert(!"Truncated and indexed stores not supported yet");
1046 } else {
1047 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1048 }
1049 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001050 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001051 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001052
1053 EVT ValueVT = Value.getValueType();
1054
1055 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1056 return SDValue();
1057 }
1058
1059 // Lowering for indirect addressing
1060
1061 const MachineFunction &MF = DAG.getMachineFunction();
1062 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1063 getTargetMachine().getFrameLowering());
1064 unsigned StackWidth = TFL->getStackWidth(MF);
1065
1066 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1067
1068 if (ValueVT.isVector()) {
1069 unsigned NumElemVT = ValueVT.getVectorNumElements();
1070 EVT ElemVT = ValueVT.getVectorElementType();
1071 SDValue Stores[4];
1072
1073 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1074 "vector width in load");
1075
1076 for (unsigned i = 0; i < NumElemVT; ++i) {
1077 unsigned Channel, PtrIncr;
1078 getStackAddress(StackWidth, i, Channel, PtrIncr);
1079 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1080 DAG.getConstant(PtrIncr, MVT::i32));
1081 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1082 Value, DAG.getConstant(i, MVT::i32));
1083
1084 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1085 Chain, Elem, Ptr,
1086 DAG.getTargetConstant(Channel, MVT::i32));
1087 }
1088 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1089 } else {
1090 if (ValueVT == MVT::i8) {
1091 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1092 }
1093 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001094 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001095 }
1096
1097 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001098}
1099
Tom Stellard365366f2013-01-23 02:09:06 +00001100// return (512 + (kc_bank << 12)
1101static int
1102ConstantAddressBlock(unsigned AddressSpace) {
1103 switch (AddressSpace) {
1104 case AMDGPUAS::CONSTANT_BUFFER_0:
1105 return 512;
1106 case AMDGPUAS::CONSTANT_BUFFER_1:
1107 return 512 + 4096;
1108 case AMDGPUAS::CONSTANT_BUFFER_2:
1109 return 512 + 4096 * 2;
1110 case AMDGPUAS::CONSTANT_BUFFER_3:
1111 return 512 + 4096 * 3;
1112 case AMDGPUAS::CONSTANT_BUFFER_4:
1113 return 512 + 4096 * 4;
1114 case AMDGPUAS::CONSTANT_BUFFER_5:
1115 return 512 + 4096 * 5;
1116 case AMDGPUAS::CONSTANT_BUFFER_6:
1117 return 512 + 4096 * 6;
1118 case AMDGPUAS::CONSTANT_BUFFER_7:
1119 return 512 + 4096 * 7;
1120 case AMDGPUAS::CONSTANT_BUFFER_8:
1121 return 512 + 4096 * 8;
1122 case AMDGPUAS::CONSTANT_BUFFER_9:
1123 return 512 + 4096 * 9;
1124 case AMDGPUAS::CONSTANT_BUFFER_10:
1125 return 512 + 4096 * 10;
1126 case AMDGPUAS::CONSTANT_BUFFER_11:
1127 return 512 + 4096 * 11;
1128 case AMDGPUAS::CONSTANT_BUFFER_12:
1129 return 512 + 4096 * 12;
1130 case AMDGPUAS::CONSTANT_BUFFER_13:
1131 return 512 + 4096 * 13;
1132 case AMDGPUAS::CONSTANT_BUFFER_14:
1133 return 512 + 4096 * 14;
1134 case AMDGPUAS::CONSTANT_BUFFER_15:
1135 return 512 + 4096 * 15;
1136 default:
1137 return -1;
1138 }
1139}
1140
1141SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1142{
1143 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001144 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001145 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1146 SDValue Chain = Op.getOperand(0);
1147 SDValue Ptr = Op.getOperand(1);
1148 SDValue LoweredLoad;
1149
Tom Stellard35bb18c2013-08-26 15:06:04 +00001150 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1151 SDValue MergedValues[2] = {
1152 SplitVectorLoad(Op, DAG),
1153 Chain
1154 };
1155 return DAG.getMergeValues(MergedValues, 2, DL);
1156 }
1157
Tom Stellard365366f2013-01-23 02:09:06 +00001158 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1159 if (ConstantBlock > -1) {
1160 SDValue Result;
1161 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001162 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1163 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001164 SDValue Slots[4];
1165 for (unsigned i = 0; i < 4; i++) {
1166 // We want Const position encoded with the following formula :
1167 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1168 // const_index is Ptr computed by llvm using an alignment of 16.
1169 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1170 // then div by 4 at the ISel step
1171 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1172 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1173 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1174 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001175 EVT NewVT = MVT::v4i32;
1176 unsigned NumElements = 4;
1177 if (VT.isVector()) {
1178 NewVT = VT;
1179 NumElements = VT.getVectorNumElements();
1180 }
1181 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001182 } else {
1183 // non constant ptr cant be folded, keeps it as a v4f32 load
1184 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001185 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001186 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001187 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001188 );
1189 }
1190
1191 if (!VT.isVector()) {
1192 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1193 DAG.getConstant(0, MVT::i32));
1194 }
1195
1196 SDValue MergedValues[2] = {
1197 Result,
1198 Chain
1199 };
1200 return DAG.getMergeValues(MergedValues, 2, DL);
1201 }
1202
Tom Stellard84021442013-07-23 01:48:24 +00001203 // For most operations returning SDValue() will result int he node being
1204 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1205 // we need to manually expand loads that may be legal in some address spaces
1206 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1207 // for compute shaders, since the data is sign extended when it is uploaded
1208 // to the buffer. Howerver SEXT loads from other addresspaces are not
1209 // supported, so we need to expand them here.
1210 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1211 EVT MemVT = LoadNode->getMemoryVT();
1212 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1213 SDValue ShiftAmount =
1214 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1215 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1216 LoadNode->getPointerInfo(), MemVT,
1217 LoadNode->isVolatile(),
1218 LoadNode->isNonTemporal(),
1219 LoadNode->getAlignment());
1220 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1221 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1222
1223 SDValue MergedValues[2] = { Sra, Chain };
1224 return DAG.getMergeValues(MergedValues, 2, DL);
1225 }
1226
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001227 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1228 return SDValue();
1229 }
1230
1231 // Lowering for indirect addressing
1232 const MachineFunction &MF = DAG.getMachineFunction();
1233 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1234 getTargetMachine().getFrameLowering());
1235 unsigned StackWidth = TFL->getStackWidth(MF);
1236
1237 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1238
1239 if (VT.isVector()) {
1240 unsigned NumElemVT = VT.getVectorNumElements();
1241 EVT ElemVT = VT.getVectorElementType();
1242 SDValue Loads[4];
1243
1244 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1245 "vector width in load");
1246
1247 for (unsigned i = 0; i < NumElemVT; ++i) {
1248 unsigned Channel, PtrIncr;
1249 getStackAddress(StackWidth, i, Channel, PtrIncr);
1250 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1251 DAG.getConstant(PtrIncr, MVT::i32));
1252 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1253 Chain, Ptr,
1254 DAG.getTargetConstant(Channel, MVT::i32),
1255 Op.getOperand(2));
1256 }
1257 for (unsigned i = NumElemVT; i < 4; ++i) {
1258 Loads[i] = DAG.getUNDEF(ElemVT);
1259 }
1260 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1261 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1262 } else {
1263 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1264 Chain, Ptr,
1265 DAG.getTargetConstant(0, MVT::i32), // Channel
1266 Op.getOperand(2));
1267 }
1268
1269 SDValue Ops[2];
1270 Ops[0] = LoweredLoad;
1271 Ops[1] = Chain;
1272
1273 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001274}
Tom Stellard75aadc22012-12-11 21:25:42 +00001275
Tom Stellard75aadc22012-12-11 21:25:42 +00001276/// XXX Only kernel functions are supported, so we can assume for now that
1277/// every function is a kernel function, but in the future we should use
1278/// separate calling conventions for kernel and non-kernel functions.
1279SDValue R600TargetLowering::LowerFormalArguments(
1280 SDValue Chain,
1281 CallingConv::ID CallConv,
1282 bool isVarArg,
1283 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001284 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001285 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001286 SmallVector<CCValAssign, 16> ArgLocs;
1287 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1288 getTargetMachine(), ArgLocs, *DAG.getContext());
1289
1290 AnalyzeFormalArguments(CCInfo, Ins);
1291
Tom Stellard1e803092013-07-23 01:48:18 +00001292 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001293 CCValAssign &VA = ArgLocs[i];
1294 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001295
Tom Stellard75aadc22012-12-11 21:25:42 +00001296 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001297 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001298
1299 // The first 36 bytes of the input buffer contains information about
1300 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001301 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001302 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001303 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1304 false, false, 4); // 4 is the prefered alignment for
1305 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001306 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001307 }
1308 return Chain;
1309}
1310
Matt Arsenault758659232013-05-18 00:21:46 +00001311EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001312 if (!VT.isVector()) return MVT::i32;
1313 return VT.changeVectorElementTypeToInteger();
1314}
1315
Benjamin Kramer193960c2013-06-11 13:32:25 +00001316static SDValue
1317CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1318 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001319 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1320 assert(RemapSwizzle.empty());
1321 SDValue NewBldVec[4] = {
1322 VectorEntry.getOperand(0),
1323 VectorEntry.getOperand(1),
1324 VectorEntry.getOperand(2),
1325 VectorEntry.getOperand(3)
1326 };
1327
1328 for (unsigned i = 0; i < 4; i++) {
1329 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1330 if (C->isZero()) {
1331 RemapSwizzle[i] = 4; // SEL_0
1332 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1333 } else if (C->isExactlyValue(1.0)) {
1334 RemapSwizzle[i] = 5; // SEL_1
1335 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1336 }
1337 }
1338
1339 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1340 continue;
1341 for (unsigned j = 0; j < i; j++) {
1342 if (NewBldVec[i] == NewBldVec[j]) {
1343 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1344 RemapSwizzle[i] = j;
1345 break;
1346 }
1347 }
1348 }
1349
1350 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1351 VectorEntry.getValueType(), NewBldVec, 4);
1352}
1353
Benjamin Kramer193960c2013-06-11 13:32:25 +00001354static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1355 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001356 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1357 assert(RemapSwizzle.empty());
1358 SDValue NewBldVec[4] = {
1359 VectorEntry.getOperand(0),
1360 VectorEntry.getOperand(1),
1361 VectorEntry.getOperand(2),
1362 VectorEntry.getOperand(3)
1363 };
1364 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001365 for (unsigned i = 0; i < 4; i++)
1366 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001367
1368 for (unsigned i = 0; i < 4; i++) {
1369 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1370 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1371 ->getZExtValue();
1372 if (!isUnmovable[Idx]) {
1373 // Swap i and Idx
1374 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001375 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001376 }
1377 isUnmovable[Idx] = true;
1378 }
1379 }
1380
1381 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1382 VectorEntry.getValueType(), NewBldVec, 4);
1383}
1384
1385
1386SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1387SDValue Swz[4], SelectionDAG &DAG) const {
1388 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1389 // Old -> New swizzle values
1390 DenseMap<unsigned, unsigned> SwizzleRemap;
1391
1392 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1393 for (unsigned i = 0; i < 4; i++) {
1394 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1395 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1396 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1397 }
1398
1399 SwizzleRemap.clear();
1400 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1401 for (unsigned i = 0; i < 4; i++) {
1402 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1403 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1404 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1405 }
1406
1407 return BuildVector;
1408}
1409
1410
Tom Stellard75aadc22012-12-11 21:25:42 +00001411//===----------------------------------------------------------------------===//
1412// Custom DAG Optimizations
1413//===----------------------------------------------------------------------===//
1414
1415SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1416 DAGCombinerInfo &DCI) const {
1417 SelectionDAG &DAG = DCI.DAG;
1418
1419 switch (N->getOpcode()) {
1420 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1421 case ISD::FP_ROUND: {
1422 SDValue Arg = N->getOperand(0);
1423 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001424 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001425 Arg.getOperand(0));
1426 }
1427 break;
1428 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001429
1430 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1431 // (i32 select_cc f32, f32, -1, 0 cc)
1432 //
1433 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1434 // this to one of the SET*_DX10 instructions.
1435 case ISD::FP_TO_SINT: {
1436 SDValue FNeg = N->getOperand(0);
1437 if (FNeg.getOpcode() != ISD::FNEG) {
1438 return SDValue();
1439 }
1440 SDValue SelectCC = FNeg.getOperand(0);
1441 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1442 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1443 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1444 !isHWTrueValue(SelectCC.getOperand(2)) ||
1445 !isHWFalseValue(SelectCC.getOperand(3))) {
1446 return SDValue();
1447 }
1448
Andrew Trickef9de2a2013-05-25 02:42:55 +00001449 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001450 SelectCC.getOperand(0), // LHS
1451 SelectCC.getOperand(1), // RHS
1452 DAG.getConstant(-1, MVT::i32), // True
1453 DAG.getConstant(0, MVT::i32), // Flase
1454 SelectCC.getOperand(4)); // CC
1455
1456 break;
1457 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001458
1459 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1460 // => build_vector elt0, …, NewEltIdx, …, eltN
1461 case ISD::INSERT_VECTOR_ELT: {
1462 SDValue InVec = N->getOperand(0);
1463 SDValue InVal = N->getOperand(1);
1464 SDValue EltNo = N->getOperand(2);
1465 SDLoc dl(N);
1466
1467 // If the inserted element is an UNDEF, just use the input vector.
1468 if (InVal.getOpcode() == ISD::UNDEF)
1469 return InVec;
1470
1471 EVT VT = InVec.getValueType();
1472
1473 // If we can't generate a legal BUILD_VECTOR, exit
1474 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1475 return SDValue();
1476
1477 // Check that we know which element is being inserted
1478 if (!isa<ConstantSDNode>(EltNo))
1479 return SDValue();
1480 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1481
1482 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1483 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1484 // vector elements.
1485 SmallVector<SDValue, 8> Ops;
1486 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1487 Ops.append(InVec.getNode()->op_begin(),
1488 InVec.getNode()->op_end());
1489 } else if (InVec.getOpcode() == ISD::UNDEF) {
1490 unsigned NElts = VT.getVectorNumElements();
1491 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1492 } else {
1493 return SDValue();
1494 }
1495
1496 // Insert the element
1497 if (Elt < Ops.size()) {
1498 // All the operands of BUILD_VECTOR must have the same type;
1499 // we enforce that here.
1500 EVT OpVT = Ops[0].getValueType();
1501 if (InVal.getValueType() != OpVT)
1502 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1503 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1504 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1505 Ops[Elt] = InVal;
1506 }
1507
1508 // Return the new vector
1509 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1510 VT, &Ops[0], Ops.size());
1511 }
1512
Tom Stellard365366f2013-01-23 02:09:06 +00001513 // Extract_vec (Build_vector) generated by custom lowering
1514 // also needs to be customly combined
1515 case ISD::EXTRACT_VECTOR_ELT: {
1516 SDValue Arg = N->getOperand(0);
1517 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1518 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1519 unsigned Element = Const->getZExtValue();
1520 return Arg->getOperand(Element);
1521 }
1522 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001523 if (Arg.getOpcode() == ISD::BITCAST &&
1524 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1525 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1526 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001527 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001528 Arg->getOperand(0).getOperand(Element));
1529 }
1530 }
Tom Stellard365366f2013-01-23 02:09:06 +00001531 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001532
1533 case ISD::SELECT_CC: {
1534 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1535 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001536 //
1537 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1538 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001539 SDValue LHS = N->getOperand(0);
1540 if (LHS.getOpcode() != ISD::SELECT_CC) {
1541 return SDValue();
1542 }
1543
1544 SDValue RHS = N->getOperand(1);
1545 SDValue True = N->getOperand(2);
1546 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001547 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001548
1549 if (LHS.getOperand(2).getNode() != True.getNode() ||
1550 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001551 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001552 return SDValue();
1553 }
1554
Tom Stellard5e524892013-03-08 15:37:11 +00001555 switch (NCC) {
1556 default: return SDValue();
1557 case ISD::SETNE: return LHS;
1558 case ISD::SETEQ: {
1559 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1560 LHSCC = ISD::getSetCCInverse(LHSCC,
1561 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001562 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001563 LHS.getOperand(0),
1564 LHS.getOperand(1),
1565 LHS.getOperand(2),
1566 LHS.getOperand(3),
1567 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001568 }
Tom Stellard5e524892013-03-08 15:37:11 +00001569 }
1570 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001571
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001572 case AMDGPUISD::EXPORT: {
1573 SDValue Arg = N->getOperand(1);
1574 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1575 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001576
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001577 SDValue NewArgs[8] = {
1578 N->getOperand(0), // Chain
1579 SDValue(),
1580 N->getOperand(2), // ArrayBase
1581 N->getOperand(3), // Type
1582 N->getOperand(4), // SWZ_X
1583 N->getOperand(5), // SWZ_Y
1584 N->getOperand(6), // SWZ_Z
1585 N->getOperand(7) // SWZ_W
1586 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001587 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001588 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001589 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001590 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001591 case AMDGPUISD::TEXTURE_FETCH: {
1592 SDValue Arg = N->getOperand(1);
1593 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1594 break;
1595
1596 SDValue NewArgs[19] = {
1597 N->getOperand(0),
1598 N->getOperand(1),
1599 N->getOperand(2),
1600 N->getOperand(3),
1601 N->getOperand(4),
1602 N->getOperand(5),
1603 N->getOperand(6),
1604 N->getOperand(7),
1605 N->getOperand(8),
1606 N->getOperand(9),
1607 N->getOperand(10),
1608 N->getOperand(11),
1609 N->getOperand(12),
1610 N->getOperand(13),
1611 N->getOperand(14),
1612 N->getOperand(15),
1613 N->getOperand(16),
1614 N->getOperand(17),
1615 N->getOperand(18),
1616 };
1617 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1618 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1619 NewArgs, 19);
1620 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001621 }
1622 return SDValue();
1623}