blob: f0242b86c0b724f507968c1f993bdddae641a7f4 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000041 setOperationAction(ISD::FCOS, MVT::f32, Custom);
42 setOperationAction(ISD::FSIN, MVT::f32, Custom);
43
Tom Stellard75aadc22012-12-11 21:25:42 +000044 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000045 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000046
Tom Stellard492ebea2013-03-08 15:37:07 +000047 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
48 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
50 setOperationAction(ISD::FSUB, MVT::f32, Expand);
51
52 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
53 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
54 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard75aadc22012-12-11 21:25:42 +000056 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
57 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
58
Tom Stellarde8f9f282013-03-08 15:37:05 +000059 setOperationAction(ISD::SETCC, MVT::i32, Expand);
60 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000061 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
62
63 setOperationAction(ISD::SELECT, MVT::i32, Custom);
64 setOperationAction(ISD::SELECT, MVT::f32, Custom);
65
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000066 // Legalize loads and stores to the private address space.
67 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000069 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000070 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
71 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
72 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
73 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000074 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000076 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000077 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +000078 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
79 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000080
Tom Stellard365366f2013-01-23 02:09:06 +000081 setOperationAction(ISD::LOAD, MVT::i32, Custom);
82 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000083 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
84
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000086 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000087 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000088 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +000089 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000090
Michel Danzer49812b52013-07-10 16:37:07 +000091 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
92
Tom Stellardb852af52013-03-08 15:37:03 +000093 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000094 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +000095 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +000096}
97
98MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
99 MachineInstr * MI, MachineBasicBlock * BB) const {
100 MachineFunction * MF = BB->getParent();
101 MachineRegisterInfo &MRI = MF->getRegInfo();
102 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000103 const R600InstrInfo *TII =
104 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000105
106 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000107 default:
108 if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) {
109 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
110 TII->get(MI->getOpcode()),
111 AMDGPU::OQAP);
112 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
113 NewMI.addOperand(MI->getOperand(i));
114 }
115 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
116 MI->getOperand(0).getReg(),
117 AMDGPU::OQAP);
118 } else {
119 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
120 }
121 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000122 case AMDGPU::CLAMP_R600: {
123 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
124 AMDGPU::MOV,
125 MI->getOperand(0).getReg(),
126 MI->getOperand(1).getReg());
127 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
128 break;
129 }
130
131 case AMDGPU::FABS_R600: {
132 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
133 AMDGPU::MOV,
134 MI->getOperand(0).getReg(),
135 MI->getOperand(1).getReg());
136 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
137 break;
138 }
139
140 case AMDGPU::FNEG_R600: {
141 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
142 AMDGPU::MOV,
143 MI->getOperand(0).getReg(),
144 MI->getOperand(1).getReg());
145 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
146 break;
147 }
148
Tom Stellard75aadc22012-12-11 21:25:42 +0000149 case AMDGPU::MASK_WRITE: {
150 unsigned maskedRegister = MI->getOperand(0).getReg();
151 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
152 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
153 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
154 break;
155 }
156
157 case AMDGPU::MOV_IMM_F32:
158 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
159 MI->getOperand(1).getFPImm()->getValueAPF()
160 .bitcastToAPInt().getZExtValue());
161 break;
162 case AMDGPU::MOV_IMM_I32:
163 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
164 MI->getOperand(1).getImm());
165 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000166 case AMDGPU::CONST_COPY: {
167 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
168 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000169 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000170 MI->getOperand(1).getImm());
171 break;
172 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
174 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000175 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000176 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
177 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
178
179 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
180 .addOperand(MI->getOperand(0))
181 .addOperand(MI->getOperand(1))
182 .addImm(EOP); // Set End of program bit
183 break;
184 }
185
Tom Stellard75aadc22012-12-11 21:25:42 +0000186 case AMDGPU::TXD: {
187 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
188 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000189 MachineOperand &RID = MI->getOperand(4);
190 MachineOperand &SID = MI->getOperand(5);
191 unsigned TextureId = MI->getOperand(6).getImm();
192 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
193 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000194
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000195 switch (TextureId) {
196 case 5: // Rect
197 CTX = CTY = 0;
198 break;
199 case 6: // Shadow1D
200 SrcW = SrcZ;
201 break;
202 case 7: // Shadow2D
203 SrcW = SrcZ;
204 break;
205 case 8: // ShadowRect
206 CTX = CTY = 0;
207 SrcW = SrcZ;
208 break;
209 case 9: // 1DArray
210 SrcZ = SrcY;
211 CTZ = 0;
212 break;
213 case 10: // 2DArray
214 CTZ = 0;
215 break;
216 case 11: // Shadow1DArray
217 SrcZ = SrcY;
218 CTZ = 0;
219 break;
220 case 12: // Shadow2DArray
221 CTZ = 0;
222 break;
223 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000224 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
225 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 .addImm(SrcX)
227 .addImm(SrcY)
228 .addImm(SrcZ)
229 .addImm(SrcW)
230 .addImm(0)
231 .addImm(0)
232 .addImm(0)
233 .addImm(0)
234 .addImm(1)
235 .addImm(2)
236 .addImm(3)
237 .addOperand(RID)
238 .addOperand(SID)
239 .addImm(CTX)
240 .addImm(CTY)
241 .addImm(CTZ)
242 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000243 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
244 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000245 .addImm(SrcX)
246 .addImm(SrcY)
247 .addImm(SrcZ)
248 .addImm(SrcW)
249 .addImm(0)
250 .addImm(0)
251 .addImm(0)
252 .addImm(0)
253 .addImm(1)
254 .addImm(2)
255 .addImm(3)
256 .addOperand(RID)
257 .addOperand(SID)
258 .addImm(CTX)
259 .addImm(CTY)
260 .addImm(CTZ)
261 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000262 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
263 .addOperand(MI->getOperand(0))
264 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000265 .addImm(SrcX)
266 .addImm(SrcY)
267 .addImm(SrcZ)
268 .addImm(SrcW)
269 .addImm(0)
270 .addImm(0)
271 .addImm(0)
272 .addImm(0)
273 .addImm(1)
274 .addImm(2)
275 .addImm(3)
276 .addOperand(RID)
277 .addOperand(SID)
278 .addImm(CTX)
279 .addImm(CTY)
280 .addImm(CTZ)
281 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 .addReg(T0, RegState::Implicit)
283 .addReg(T1, RegState::Implicit);
284 break;
285 }
286
287 case AMDGPU::TXD_SHADOW: {
288 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
289 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000290 MachineOperand &RID = MI->getOperand(4);
291 MachineOperand &SID = MI->getOperand(5);
292 unsigned TextureId = MI->getOperand(6).getImm();
293 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
294 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
295
296 switch (TextureId) {
297 case 5: // Rect
298 CTX = CTY = 0;
299 break;
300 case 6: // Shadow1D
301 SrcW = SrcZ;
302 break;
303 case 7: // Shadow2D
304 SrcW = SrcZ;
305 break;
306 case 8: // ShadowRect
307 CTX = CTY = 0;
308 SrcW = SrcZ;
309 break;
310 case 9: // 1DArray
311 SrcZ = SrcY;
312 CTZ = 0;
313 break;
314 case 10: // 2DArray
315 CTZ = 0;
316 break;
317 case 11: // Shadow1DArray
318 SrcZ = SrcY;
319 CTZ = 0;
320 break;
321 case 12: // Shadow2DArray
322 CTZ = 0;
323 break;
324 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000325
326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 .addImm(SrcX)
348 .addImm(SrcY)
349 .addImm(SrcZ)
350 .addImm(SrcW)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(1)
356 .addImm(2)
357 .addImm(3)
358 .addOperand(RID)
359 .addOperand(SID)
360 .addImm(CTX)
361 .addImm(CTY)
362 .addImm(CTZ)
363 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
365 .addOperand(MI->getOperand(0))
366 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 .addReg(T0, RegState::Implicit)
385 .addReg(T1, RegState::Implicit);
386 break;
387 }
388
389 case AMDGPU::BRANCH:
390 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000391 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000392 break;
393
394 case AMDGPU::BRANCH_COND_f32: {
395 MachineInstr *NewMI =
396 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
397 AMDGPU::PREDICATE_BIT)
398 .addOperand(MI->getOperand(1))
399 .addImm(OPCODE_IS_NOT_ZERO)
400 .addImm(0); // Flags
401 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000403 .addOperand(MI->getOperand(0))
404 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
405 break;
406 }
407
408 case AMDGPU::BRANCH_COND_i32: {
409 MachineInstr *NewMI =
410 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
411 AMDGPU::PREDICATE_BIT)
412 .addOperand(MI->getOperand(1))
413 .addImm(OPCODE_IS_NOT_ZERO_INT)
414 .addImm(0); // Flags
415 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000416 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addOperand(MI->getOperand(0))
418 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
419 break;
420 }
421
Tom Stellard75aadc22012-12-11 21:25:42 +0000422 case AMDGPU::EG_ExportSwz:
423 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000424 // Instruction is left unmodified if its not the last one of its type
425 bool isLastInstructionOfItsType = true;
426 unsigned InstExportType = MI->getOperand(1).getImm();
427 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
428 EndBlock = BB->end(); NextExportInst != EndBlock;
429 NextExportInst = llvm::next(NextExportInst)) {
430 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
431 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
432 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
433 .getImm();
434 if (CurrentInstExportType == InstExportType) {
435 isLastInstructionOfItsType = false;
436 break;
437 }
438 }
439 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000440 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000441 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000442 return BB;
443 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
444 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
445 .addOperand(MI->getOperand(0))
446 .addOperand(MI->getOperand(1))
447 .addOperand(MI->getOperand(2))
448 .addOperand(MI->getOperand(3))
449 .addOperand(MI->getOperand(4))
450 .addOperand(MI->getOperand(5))
451 .addOperand(MI->getOperand(6))
452 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000453 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 break;
455 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000456 case AMDGPU::RETURN: {
457 // RETURN instructions must have the live-out registers as implicit uses,
458 // otherwise they appear dead.
459 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
460 MachineInstrBuilder MIB(*MF, MI);
461 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
462 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
463 return BB;
464 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000465 }
466
467 MI->eraseFromParent();
468 return BB;
469}
470
471//===----------------------------------------------------------------------===//
472// Custom DAG Lowering Operations
473//===----------------------------------------------------------------------===//
474
Tom Stellard75aadc22012-12-11 21:25:42 +0000475SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000476 MachineFunction &MF = DAG.getMachineFunction();
477 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 switch (Op.getOpcode()) {
479 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000480 case ISD::FCOS:
481 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
483 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000485 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000486 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000487 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000488 case ISD::INTRINSIC_VOID: {
489 SDValue Chain = Op.getOperand(0);
490 unsigned IntrinsicID =
491 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
492 switch (IntrinsicID) {
493 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
495 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000496 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000497 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000499 case AMDGPUIntrinsic::R600_store_swizzle: {
500 const SDValue Args[8] = {
501 Chain,
502 Op.getOperand(2), // Export Value
503 Op.getOperand(3), // ArrayBase
504 Op.getOperand(4), // Type
505 DAG.getConstant(0, MVT::i32), // SWZ_X
506 DAG.getConstant(1, MVT::i32), // SWZ_Y
507 DAG.getConstant(2, MVT::i32), // SWZ_Z
508 DAG.getConstant(3, MVT::i32) // SWZ_W
509 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000510 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000511 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000512 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000513
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 // default for switch(IntrinsicID)
515 default: break;
516 }
517 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
518 break;
519 }
520 case ISD::INTRINSIC_WO_CHAIN: {
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
523 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000524 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 switch(IntrinsicID) {
526 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
527 case AMDGPUIntrinsic::R600_load_input: {
528 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
529 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000530 MachineFunction &MF = DAG.getMachineFunction();
531 MachineRegisterInfo &MRI = MF.getRegInfo();
532 MRI.addLiveIn(Reg);
533 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000534 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000536
537 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000538 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000539 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
540 MachineSDNode *interp;
541 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000542 const MachineFunction &MF = DAG.getMachineFunction();
543 const R600InstrInfo *TII =
544 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000545 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
546 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
547 return DAG.getTargetExtractSubreg(
548 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
549 DL, MVT::f32, SDValue(interp, 0));
550 }
551
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000552 MachineFunction &MF = DAG.getMachineFunction();
553 MachineRegisterInfo &MRI = MF.getRegInfo();
554 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
555 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
556 MRI.addLiveIn(RegisterI);
557 MRI.addLiveIn(RegisterJ);
558 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
559 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
560 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
561 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
562
Tom Stellard41afe6a2013-02-05 17:09:14 +0000563 if (slot % 4 < 2)
564 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
565 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000566 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000567 else
568 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
569 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000570 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000571 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000572 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000573 case AMDGPUIntrinsic::R600_tex:
574 case AMDGPUIntrinsic::R600_texc:
575 case AMDGPUIntrinsic::R600_txl:
576 case AMDGPUIntrinsic::R600_txlc:
577 case AMDGPUIntrinsic::R600_txb:
578 case AMDGPUIntrinsic::R600_txbc:
579 case AMDGPUIntrinsic::R600_txf:
580 case AMDGPUIntrinsic::R600_txq:
581 case AMDGPUIntrinsic::R600_ddx:
582 case AMDGPUIntrinsic::R600_ddy: {
583 unsigned TextureOp;
584 switch (IntrinsicID) {
585 case AMDGPUIntrinsic::R600_tex:
586 TextureOp = 0;
587 break;
588 case AMDGPUIntrinsic::R600_texc:
589 TextureOp = 1;
590 break;
591 case AMDGPUIntrinsic::R600_txl:
592 TextureOp = 2;
593 break;
594 case AMDGPUIntrinsic::R600_txlc:
595 TextureOp = 3;
596 break;
597 case AMDGPUIntrinsic::R600_txb:
598 TextureOp = 4;
599 break;
600 case AMDGPUIntrinsic::R600_txbc:
601 TextureOp = 5;
602 break;
603 case AMDGPUIntrinsic::R600_txf:
604 TextureOp = 6;
605 break;
606 case AMDGPUIntrinsic::R600_txq:
607 TextureOp = 7;
608 break;
609 case AMDGPUIntrinsic::R600_ddx:
610 TextureOp = 8;
611 break;
612 case AMDGPUIntrinsic::R600_ddy:
613 TextureOp = 9;
614 break;
615 default:
616 llvm_unreachable("Unknow Texture Operation");
617 }
618
619 SDValue TexArgs[19] = {
620 DAG.getConstant(TextureOp, MVT::i32),
621 Op.getOperand(1),
622 DAG.getConstant(0, MVT::i32),
623 DAG.getConstant(1, MVT::i32),
624 DAG.getConstant(2, MVT::i32),
625 DAG.getConstant(3, MVT::i32),
626 Op.getOperand(2),
627 Op.getOperand(3),
628 Op.getOperand(4),
629 DAG.getConstant(0, MVT::i32),
630 DAG.getConstant(1, MVT::i32),
631 DAG.getConstant(2, MVT::i32),
632 DAG.getConstant(3, MVT::i32),
633 Op.getOperand(5),
634 Op.getOperand(6),
635 Op.getOperand(7),
636 Op.getOperand(8),
637 Op.getOperand(9),
638 Op.getOperand(10)
639 };
640 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
641 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000642 case AMDGPUIntrinsic::AMDGPU_dp4: {
643 SDValue Args[8] = {
644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
645 DAG.getConstant(0, MVT::i32)),
646 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
647 DAG.getConstant(0, MVT::i32)),
648 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
649 DAG.getConstant(1, MVT::i32)),
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
651 DAG.getConstant(1, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
653 DAG.getConstant(2, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
655 DAG.getConstant(2, MVT::i32)),
656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
657 DAG.getConstant(3, MVT::i32)),
658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
659 DAG.getConstant(3, MVT::i32))
660 };
661 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
662 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000663
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000664 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000665 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000666 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000667 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000668 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000670 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000676 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000677 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return LowerImplicitParameter(DAG, VT, DL, 8);
682
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000683 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
685 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
688 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
697 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
700 AMDGPU::T0_Z, VT);
701 }
702 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
703 break;
704 }
705 } // end switch(Op.getOpcode())
706 return SDValue();
707}
708
709void R600TargetLowering::ReplaceNodeResults(SDNode *N,
710 SmallVectorImpl<SDValue> &Results,
711 SelectionDAG &DAG) const {
712 switch (N->getOpcode()) {
713 default: return;
714 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000715 return;
716 case ISD::LOAD: {
717 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
718 Results.push_back(SDValue(Node, 0));
719 Results.push_back(SDValue(Node, 1));
720 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
721 // function
722 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
723 return;
724 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000725 case ISD::STORE:
726 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
727 Results.push_back(SDValue(Node, 0));
728 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 }
730}
731
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000732SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
733 // On hw >= R700, COS/SIN input must be between -1. and 1.
734 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
735 EVT VT = Op.getValueType();
736 SDValue Arg = Op.getOperand(0);
737 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
738 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
739 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
740 DAG.getConstantFP(0.15915494309, MVT::f32)),
741 DAG.getConstantFP(0.5, MVT::f32)));
742 unsigned TrigNode;
743 switch (Op.getOpcode()) {
744 case ISD::FCOS:
745 TrigNode = AMDGPUISD::COS_HW;
746 break;
747 case ISD::FSIN:
748 TrigNode = AMDGPUISD::SIN_HW;
749 break;
750 default:
751 llvm_unreachable("Wrong trig opcode");
752 }
753 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
754 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
755 DAG.getConstantFP(-0.5, MVT::f32)));
756 if (Gen >= AMDGPUSubtarget::R700)
757 return TrigVal;
758 // On R600 hw, COS/SIN input must be between -Pi and Pi.
759 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
760 DAG.getConstantFP(3.14159265359, MVT::f32));
761}
762
Tom Stellard75aadc22012-12-11 21:25:42 +0000763SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
764 return DAG.getNode(
765 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000766 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000767 MVT::i1,
768 Op, DAG.getConstantFP(0.0f, MVT::f32),
769 DAG.getCondCode(ISD::SETNE)
770 );
771}
772
Tom Stellard75aadc22012-12-11 21:25:42 +0000773SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000774 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 unsigned DwordOffset) const {
776 unsigned ByteOffset = DwordOffset * 4;
777 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000778 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000779
780 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
781 assert(isInt<16>(ByteOffset));
782
783 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
784 DAG.getConstant(ByteOffset, MVT::i32), // PTR
785 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
786 false, false, false, 0);
787}
788
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000789SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
790
791 MachineFunction &MF = DAG.getMachineFunction();
792 const AMDGPUFrameLowering *TFL =
793 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
794
795 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
796 assert(FIN);
797
798 unsigned FrameIndex = FIN->getIndex();
799 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
800 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
801}
802
Tom Stellard75aadc22012-12-11 21:25:42 +0000803bool R600TargetLowering::isZero(SDValue Op) const {
804 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
805 return Cst->isNullValue();
806 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
807 return CstFP->isZero();
808 } else {
809 return false;
810 }
811}
812
813SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000814 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000815 EVT VT = Op.getValueType();
816
817 SDValue LHS = Op.getOperand(0);
818 SDValue RHS = Op.getOperand(1);
819 SDValue True = Op.getOperand(2);
820 SDValue False = Op.getOperand(3);
821 SDValue CC = Op.getOperand(4);
822 SDValue Temp;
823
824 // LHS and RHS are guaranteed to be the same value type
825 EVT CompareVT = LHS.getValueType();
826
827 // Check if we can lower this to a native operation.
828
Tom Stellard2add82d2013-03-08 15:37:09 +0000829 // Try to lower to a SET* instruction:
830 //
831 // SET* can match the following patterns:
832 //
833 // select_cc f32, f32, -1, 0, cc_any
834 // select_cc f32, f32, 1.0f, 0.0f, cc_any
835 // select_cc i32, i32, -1, 0, cc_any
836 //
837
838 // Move hardware True/False values to the correct operand.
839 if (isHWTrueValue(False) && isHWFalseValue(True)) {
840 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
841 std::swap(False, True);
842 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
843 }
844
845 if (isHWTrueValue(True) && isHWFalseValue(False) &&
846 (CompareVT == VT || VT == MVT::i32)) {
847 // This can be matched by a SET* instruction.
848 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
849 }
850
Tom Stellard75aadc22012-12-11 21:25:42 +0000851 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000852 //
853 // CND* can match the following patterns:
854 //
855 // select_cc f32, 0.0, f32, f32, cc_any
856 // select_cc f32, 0.0, i32, i32, cc_any
857 // select_cc i32, 0, f32, f32, cc_any
858 // select_cc i32, 0, i32, i32, cc_any
859 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000860 if (isZero(LHS) || isZero(RHS)) {
861 SDValue Cond = (isZero(LHS) ? RHS : LHS);
862 SDValue Zero = (isZero(LHS) ? LHS : RHS);
863 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
864 if (CompareVT != VT) {
865 // Bitcast True / False to the correct types. This will end up being
866 // a nop, but it allows us to define only a single pattern in the
867 // .TD files for each CND* instruction rather than having to have
868 // one pattern for integer True/False and one for fp True/False
869 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
870 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
871 }
872 if (isZero(LHS)) {
873 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
874 }
875
876 switch (CCOpcode) {
877 case ISD::SETONE:
878 case ISD::SETUNE:
879 case ISD::SETNE:
880 case ISD::SETULE:
881 case ISD::SETULT:
882 case ISD::SETOLE:
883 case ISD::SETOLT:
884 case ISD::SETLE:
885 case ISD::SETLT:
886 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
887 Temp = True;
888 True = False;
889 False = Temp;
890 break;
891 default:
892 break;
893 }
894 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
895 Cond, Zero,
896 True, False,
897 DAG.getCondCode(CCOpcode));
898 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
899 }
900
Tom Stellard75aadc22012-12-11 21:25:42 +0000901
902 // Possible Min/Max pattern
903 SDValue MinMax = LowerMinMax(Op, DAG);
904 if (MinMax.getNode()) {
905 return MinMax;
906 }
907
908 // If we make it this for it means we have no native instructions to handle
909 // this SELECT_CC, so we must lower it.
910 SDValue HWTrue, HWFalse;
911
912 if (CompareVT == MVT::f32) {
913 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
914 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
915 } else if (CompareVT == MVT::i32) {
916 HWTrue = DAG.getConstant(-1, CompareVT);
917 HWFalse = DAG.getConstant(0, CompareVT);
918 }
919 else {
920 assert(!"Unhandled value type in LowerSELECT_CC");
921 }
922
923 // Lower this unsupported SELECT_CC into a combination of two supported
924 // SELECT_CC operations.
925 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
926
927 return DAG.getNode(ISD::SELECT_CC, DL, VT,
928 Cond, HWFalse,
929 True, False,
930 DAG.getCondCode(ISD::SETNE));
931}
932
933SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
934 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000935 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000936 Op.getValueType(),
937 Op.getOperand(0),
938 DAG.getConstant(0, MVT::i32),
939 Op.getOperand(1),
940 Op.getOperand(2),
941 DAG.getCondCode(ISD::SETNE));
942}
943
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000944/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
945/// convert these pointers to a register index. Each register holds
946/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
947/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
948/// for indirect addressing.
949SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
950 unsigned StackWidth,
951 SelectionDAG &DAG) const {
952 unsigned SRLPad;
953 switch(StackWidth) {
954 case 1:
955 SRLPad = 2;
956 break;
957 case 2:
958 SRLPad = 3;
959 break;
960 case 4:
961 SRLPad = 4;
962 break;
963 default: llvm_unreachable("Invalid stack width");
964 }
965
Andrew Trickef9de2a2013-05-25 02:42:55 +0000966 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000967 DAG.getConstant(SRLPad, MVT::i32));
968}
969
970void R600TargetLowering::getStackAddress(unsigned StackWidth,
971 unsigned ElemIdx,
972 unsigned &Channel,
973 unsigned &PtrIncr) const {
974 switch (StackWidth) {
975 default:
976 case 1:
977 Channel = 0;
978 if (ElemIdx > 0) {
979 PtrIncr = 1;
980 } else {
981 PtrIncr = 0;
982 }
983 break;
984 case 2:
985 Channel = ElemIdx % 2;
986 if (ElemIdx == 2) {
987 PtrIncr = 1;
988 } else {
989 PtrIncr = 0;
990 }
991 break;
992 case 4:
993 Channel = ElemIdx;
994 PtrIncr = 0;
995 break;
996 }
997}
998
Tom Stellard75aadc22012-12-11 21:25:42 +0000999SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001000 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001001 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1002 SDValue Chain = Op.getOperand(0);
1003 SDValue Value = Op.getOperand(1);
1004 SDValue Ptr = Op.getOperand(2);
1005
Tom Stellard2ffc3302013-08-26 15:05:44 +00001006 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001007 if (Result.getNode()) {
1008 return Result;
1009 }
1010
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001011 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1012 if (StoreNode->isTruncatingStore()) {
1013 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001014 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001015 EVT MemVT = StoreNode->getMemoryVT();
1016 SDValue MaskConstant;
1017 if (MemVT == MVT::i8) {
1018 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1019 } else {
1020 assert(MemVT == MVT::i16);
1021 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1022 }
1023 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1024 DAG.getConstant(2, MVT::i32));
1025 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1026 DAG.getConstant(0x00000003, VT));
1027 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1028 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1029 DAG.getConstant(3, VT));
1030 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1031 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1032 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1033 // vector instead.
1034 SDValue Src[4] = {
1035 ShiftedValue,
1036 DAG.getConstant(0, MVT::i32),
1037 DAG.getConstant(0, MVT::i32),
1038 Mask
1039 };
1040 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1041 SDValue Args[3] = { Chain, Input, DWordAddr };
1042 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1043 Op->getVTList(), Args, 3, MemVT,
1044 StoreNode->getMemOperand());
1045 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1046 Value.getValueType().bitsGE(MVT::i32)) {
1047 // Convert pointer from byte address to dword address.
1048 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1049 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1050 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001051
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001052 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1053 assert(!"Truncated and indexed stores not supported yet");
1054 } else {
1055 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1056 }
1057 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001058 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001059 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001060
1061 EVT ValueVT = Value.getValueType();
1062
1063 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1064 return SDValue();
1065 }
1066
1067 // Lowering for indirect addressing
1068
1069 const MachineFunction &MF = DAG.getMachineFunction();
1070 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1071 getTargetMachine().getFrameLowering());
1072 unsigned StackWidth = TFL->getStackWidth(MF);
1073
1074 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1075
1076 if (ValueVT.isVector()) {
1077 unsigned NumElemVT = ValueVT.getVectorNumElements();
1078 EVT ElemVT = ValueVT.getVectorElementType();
1079 SDValue Stores[4];
1080
1081 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1082 "vector width in load");
1083
1084 for (unsigned i = 0; i < NumElemVT; ++i) {
1085 unsigned Channel, PtrIncr;
1086 getStackAddress(StackWidth, i, Channel, PtrIncr);
1087 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1088 DAG.getConstant(PtrIncr, MVT::i32));
1089 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1090 Value, DAG.getConstant(i, MVT::i32));
1091
1092 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1093 Chain, Elem, Ptr,
1094 DAG.getTargetConstant(Channel, MVT::i32));
1095 }
1096 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1097 } else {
1098 if (ValueVT == MVT::i8) {
1099 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1100 }
1101 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001102 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001103 }
1104
1105 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001106}
1107
Tom Stellard365366f2013-01-23 02:09:06 +00001108// return (512 + (kc_bank << 12)
1109static int
1110ConstantAddressBlock(unsigned AddressSpace) {
1111 switch (AddressSpace) {
1112 case AMDGPUAS::CONSTANT_BUFFER_0:
1113 return 512;
1114 case AMDGPUAS::CONSTANT_BUFFER_1:
1115 return 512 + 4096;
1116 case AMDGPUAS::CONSTANT_BUFFER_2:
1117 return 512 + 4096 * 2;
1118 case AMDGPUAS::CONSTANT_BUFFER_3:
1119 return 512 + 4096 * 3;
1120 case AMDGPUAS::CONSTANT_BUFFER_4:
1121 return 512 + 4096 * 4;
1122 case AMDGPUAS::CONSTANT_BUFFER_5:
1123 return 512 + 4096 * 5;
1124 case AMDGPUAS::CONSTANT_BUFFER_6:
1125 return 512 + 4096 * 6;
1126 case AMDGPUAS::CONSTANT_BUFFER_7:
1127 return 512 + 4096 * 7;
1128 case AMDGPUAS::CONSTANT_BUFFER_8:
1129 return 512 + 4096 * 8;
1130 case AMDGPUAS::CONSTANT_BUFFER_9:
1131 return 512 + 4096 * 9;
1132 case AMDGPUAS::CONSTANT_BUFFER_10:
1133 return 512 + 4096 * 10;
1134 case AMDGPUAS::CONSTANT_BUFFER_11:
1135 return 512 + 4096 * 11;
1136 case AMDGPUAS::CONSTANT_BUFFER_12:
1137 return 512 + 4096 * 12;
1138 case AMDGPUAS::CONSTANT_BUFFER_13:
1139 return 512 + 4096 * 13;
1140 case AMDGPUAS::CONSTANT_BUFFER_14:
1141 return 512 + 4096 * 14;
1142 case AMDGPUAS::CONSTANT_BUFFER_15:
1143 return 512 + 4096 * 15;
1144 default:
1145 return -1;
1146 }
1147}
1148
1149SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1150{
1151 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001152 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001153 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1154 SDValue Chain = Op.getOperand(0);
1155 SDValue Ptr = Op.getOperand(1);
1156 SDValue LoweredLoad;
1157
Tom Stellard35bb18c2013-08-26 15:06:04 +00001158 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1159 SDValue MergedValues[2] = {
1160 SplitVectorLoad(Op, DAG),
1161 Chain
1162 };
1163 return DAG.getMergeValues(MergedValues, 2, DL);
1164 }
1165
Tom Stellard365366f2013-01-23 02:09:06 +00001166 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1167 if (ConstantBlock > -1) {
1168 SDValue Result;
1169 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001170 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1171 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001172 SDValue Slots[4];
1173 for (unsigned i = 0; i < 4; i++) {
1174 // We want Const position encoded with the following formula :
1175 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1176 // const_index is Ptr computed by llvm using an alignment of 16.
1177 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1178 // then div by 4 at the ISel step
1179 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1180 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1181 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1182 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001183 EVT NewVT = MVT::v4i32;
1184 unsigned NumElements = 4;
1185 if (VT.isVector()) {
1186 NewVT = VT;
1187 NumElements = VT.getVectorNumElements();
1188 }
1189 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001190 } else {
1191 // non constant ptr cant be folded, keeps it as a v4f32 load
1192 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001193 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001194 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001195 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001196 );
1197 }
1198
1199 if (!VT.isVector()) {
1200 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1201 DAG.getConstant(0, MVT::i32));
1202 }
1203
1204 SDValue MergedValues[2] = {
1205 Result,
1206 Chain
1207 };
1208 return DAG.getMergeValues(MergedValues, 2, DL);
1209 }
1210
Tom Stellard84021442013-07-23 01:48:24 +00001211 // For most operations returning SDValue() will result int he node being
1212 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1213 // we need to manually expand loads that may be legal in some address spaces
1214 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1215 // for compute shaders, since the data is sign extended when it is uploaded
1216 // to the buffer. Howerver SEXT loads from other addresspaces are not
1217 // supported, so we need to expand them here.
1218 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1219 EVT MemVT = LoadNode->getMemoryVT();
1220 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1221 SDValue ShiftAmount =
1222 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1223 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1224 LoadNode->getPointerInfo(), MemVT,
1225 LoadNode->isVolatile(),
1226 LoadNode->isNonTemporal(),
1227 LoadNode->getAlignment());
1228 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1229 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1230
1231 SDValue MergedValues[2] = { Sra, Chain };
1232 return DAG.getMergeValues(MergedValues, 2, DL);
1233 }
1234
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001235 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1236 return SDValue();
1237 }
1238
1239 // Lowering for indirect addressing
1240 const MachineFunction &MF = DAG.getMachineFunction();
1241 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1242 getTargetMachine().getFrameLowering());
1243 unsigned StackWidth = TFL->getStackWidth(MF);
1244
1245 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1246
1247 if (VT.isVector()) {
1248 unsigned NumElemVT = VT.getVectorNumElements();
1249 EVT ElemVT = VT.getVectorElementType();
1250 SDValue Loads[4];
1251
1252 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1253 "vector width in load");
1254
1255 for (unsigned i = 0; i < NumElemVT; ++i) {
1256 unsigned Channel, PtrIncr;
1257 getStackAddress(StackWidth, i, Channel, PtrIncr);
1258 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1259 DAG.getConstant(PtrIncr, MVT::i32));
1260 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1261 Chain, Ptr,
1262 DAG.getTargetConstant(Channel, MVT::i32),
1263 Op.getOperand(2));
1264 }
1265 for (unsigned i = NumElemVT; i < 4; ++i) {
1266 Loads[i] = DAG.getUNDEF(ElemVT);
1267 }
1268 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1269 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1270 } else {
1271 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1272 Chain, Ptr,
1273 DAG.getTargetConstant(0, MVT::i32), // Channel
1274 Op.getOperand(2));
1275 }
1276
1277 SDValue Ops[2];
1278 Ops[0] = LoweredLoad;
1279 Ops[1] = Chain;
1280
1281 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001282}
Tom Stellard75aadc22012-12-11 21:25:42 +00001283
Tom Stellard75aadc22012-12-11 21:25:42 +00001284/// XXX Only kernel functions are supported, so we can assume for now that
1285/// every function is a kernel function, but in the future we should use
1286/// separate calling conventions for kernel and non-kernel functions.
1287SDValue R600TargetLowering::LowerFormalArguments(
1288 SDValue Chain,
1289 CallingConv::ID CallConv,
1290 bool isVarArg,
1291 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001292 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001293 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001294 SmallVector<CCValAssign, 16> ArgLocs;
1295 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1296 getTargetMachine(), ArgLocs, *DAG.getContext());
1297
1298 AnalyzeFormalArguments(CCInfo, Ins);
1299
Tom Stellard1e803092013-07-23 01:48:18 +00001300 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001301 CCValAssign &VA = ArgLocs[i];
1302 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001303
Tom Stellard75aadc22012-12-11 21:25:42 +00001304 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001305 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001306
1307 // The first 36 bytes of the input buffer contains information about
1308 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001309 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001310 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001311 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1312 false, false, 4); // 4 is the prefered alignment for
1313 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001314 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001315 }
1316 return Chain;
1317}
1318
Matt Arsenault758659232013-05-18 00:21:46 +00001319EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001320 if (!VT.isVector()) return MVT::i32;
1321 return VT.changeVectorElementTypeToInteger();
1322}
1323
Benjamin Kramer193960c2013-06-11 13:32:25 +00001324static SDValue
1325CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1326 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001327 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1328 assert(RemapSwizzle.empty());
1329 SDValue NewBldVec[4] = {
1330 VectorEntry.getOperand(0),
1331 VectorEntry.getOperand(1),
1332 VectorEntry.getOperand(2),
1333 VectorEntry.getOperand(3)
1334 };
1335
1336 for (unsigned i = 0; i < 4; i++) {
1337 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1338 if (C->isZero()) {
1339 RemapSwizzle[i] = 4; // SEL_0
1340 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1341 } else if (C->isExactlyValue(1.0)) {
1342 RemapSwizzle[i] = 5; // SEL_1
1343 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1344 }
1345 }
1346
1347 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1348 continue;
1349 for (unsigned j = 0; j < i; j++) {
1350 if (NewBldVec[i] == NewBldVec[j]) {
1351 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1352 RemapSwizzle[i] = j;
1353 break;
1354 }
1355 }
1356 }
1357
1358 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1359 VectorEntry.getValueType(), NewBldVec, 4);
1360}
1361
Benjamin Kramer193960c2013-06-11 13:32:25 +00001362static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1363 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001364 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1365 assert(RemapSwizzle.empty());
1366 SDValue NewBldVec[4] = {
1367 VectorEntry.getOperand(0),
1368 VectorEntry.getOperand(1),
1369 VectorEntry.getOperand(2),
1370 VectorEntry.getOperand(3)
1371 };
1372 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001373 for (unsigned i = 0; i < 4; i++)
1374 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001375
1376 for (unsigned i = 0; i < 4; i++) {
1377 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1378 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1379 ->getZExtValue();
1380 if (!isUnmovable[Idx]) {
1381 // Swap i and Idx
1382 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001383 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001384 }
1385 isUnmovable[Idx] = true;
1386 }
1387 }
1388
1389 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1390 VectorEntry.getValueType(), NewBldVec, 4);
1391}
1392
1393
1394SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1395SDValue Swz[4], SelectionDAG &DAG) const {
1396 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1397 // Old -> New swizzle values
1398 DenseMap<unsigned, unsigned> SwizzleRemap;
1399
1400 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1401 for (unsigned i = 0; i < 4; i++) {
1402 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1403 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1404 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1405 }
1406
1407 SwizzleRemap.clear();
1408 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1409 for (unsigned i = 0; i < 4; i++) {
1410 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1411 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1412 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1413 }
1414
1415 return BuildVector;
1416}
1417
1418
Tom Stellard75aadc22012-12-11 21:25:42 +00001419//===----------------------------------------------------------------------===//
1420// Custom DAG Optimizations
1421//===----------------------------------------------------------------------===//
1422
1423SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1424 DAGCombinerInfo &DCI) const {
1425 SelectionDAG &DAG = DCI.DAG;
1426
1427 switch (N->getOpcode()) {
1428 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1429 case ISD::FP_ROUND: {
1430 SDValue Arg = N->getOperand(0);
1431 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001432 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001433 Arg.getOperand(0));
1434 }
1435 break;
1436 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001437
1438 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1439 // (i32 select_cc f32, f32, -1, 0 cc)
1440 //
1441 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1442 // this to one of the SET*_DX10 instructions.
1443 case ISD::FP_TO_SINT: {
1444 SDValue FNeg = N->getOperand(0);
1445 if (FNeg.getOpcode() != ISD::FNEG) {
1446 return SDValue();
1447 }
1448 SDValue SelectCC = FNeg.getOperand(0);
1449 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1450 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1451 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1452 !isHWTrueValue(SelectCC.getOperand(2)) ||
1453 !isHWFalseValue(SelectCC.getOperand(3))) {
1454 return SDValue();
1455 }
1456
Andrew Trickef9de2a2013-05-25 02:42:55 +00001457 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001458 SelectCC.getOperand(0), // LHS
1459 SelectCC.getOperand(1), // RHS
1460 DAG.getConstant(-1, MVT::i32), // True
1461 DAG.getConstant(0, MVT::i32), // Flase
1462 SelectCC.getOperand(4)); // CC
1463
1464 break;
1465 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001466
1467 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1468 // => build_vector elt0, …, NewEltIdx, …, eltN
1469 case ISD::INSERT_VECTOR_ELT: {
1470 SDValue InVec = N->getOperand(0);
1471 SDValue InVal = N->getOperand(1);
1472 SDValue EltNo = N->getOperand(2);
1473 SDLoc dl(N);
1474
1475 // If the inserted element is an UNDEF, just use the input vector.
1476 if (InVal.getOpcode() == ISD::UNDEF)
1477 return InVec;
1478
1479 EVT VT = InVec.getValueType();
1480
1481 // If we can't generate a legal BUILD_VECTOR, exit
1482 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1483 return SDValue();
1484
1485 // Check that we know which element is being inserted
1486 if (!isa<ConstantSDNode>(EltNo))
1487 return SDValue();
1488 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1489
1490 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1491 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1492 // vector elements.
1493 SmallVector<SDValue, 8> Ops;
1494 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1495 Ops.append(InVec.getNode()->op_begin(),
1496 InVec.getNode()->op_end());
1497 } else if (InVec.getOpcode() == ISD::UNDEF) {
1498 unsigned NElts = VT.getVectorNumElements();
1499 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1500 } else {
1501 return SDValue();
1502 }
1503
1504 // Insert the element
1505 if (Elt < Ops.size()) {
1506 // All the operands of BUILD_VECTOR must have the same type;
1507 // we enforce that here.
1508 EVT OpVT = Ops[0].getValueType();
1509 if (InVal.getValueType() != OpVT)
1510 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1511 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1512 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1513 Ops[Elt] = InVal;
1514 }
1515
1516 // Return the new vector
1517 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1518 VT, &Ops[0], Ops.size());
1519 }
1520
Tom Stellard365366f2013-01-23 02:09:06 +00001521 // Extract_vec (Build_vector) generated by custom lowering
1522 // also needs to be customly combined
1523 case ISD::EXTRACT_VECTOR_ELT: {
1524 SDValue Arg = N->getOperand(0);
1525 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1526 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1527 unsigned Element = Const->getZExtValue();
1528 return Arg->getOperand(Element);
1529 }
1530 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001531 if (Arg.getOpcode() == ISD::BITCAST &&
1532 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1533 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1534 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001535 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001536 Arg->getOperand(0).getOperand(Element));
1537 }
1538 }
Tom Stellard365366f2013-01-23 02:09:06 +00001539 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001540
1541 case ISD::SELECT_CC: {
1542 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1543 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001544 //
1545 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1546 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001547 SDValue LHS = N->getOperand(0);
1548 if (LHS.getOpcode() != ISD::SELECT_CC) {
1549 return SDValue();
1550 }
1551
1552 SDValue RHS = N->getOperand(1);
1553 SDValue True = N->getOperand(2);
1554 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001555 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001556
1557 if (LHS.getOperand(2).getNode() != True.getNode() ||
1558 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001559 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001560 return SDValue();
1561 }
1562
Tom Stellard5e524892013-03-08 15:37:11 +00001563 switch (NCC) {
1564 default: return SDValue();
1565 case ISD::SETNE: return LHS;
1566 case ISD::SETEQ: {
1567 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1568 LHSCC = ISD::getSetCCInverse(LHSCC,
1569 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001570 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001571 LHS.getOperand(0),
1572 LHS.getOperand(1),
1573 LHS.getOperand(2),
1574 LHS.getOperand(3),
1575 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001576 }
Tom Stellard5e524892013-03-08 15:37:11 +00001577 }
1578 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001579
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001580 case AMDGPUISD::EXPORT: {
1581 SDValue Arg = N->getOperand(1);
1582 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1583 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001584
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001585 SDValue NewArgs[8] = {
1586 N->getOperand(0), // Chain
1587 SDValue(),
1588 N->getOperand(2), // ArrayBase
1589 N->getOperand(3), // Type
1590 N->getOperand(4), // SWZ_X
1591 N->getOperand(5), // SWZ_Y
1592 N->getOperand(6), // SWZ_Z
1593 N->getOperand(7) // SWZ_W
1594 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001595 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001596 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001597 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001598 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001599 case AMDGPUISD::TEXTURE_FETCH: {
1600 SDValue Arg = N->getOperand(1);
1601 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1602 break;
1603
1604 SDValue NewArgs[19] = {
1605 N->getOperand(0),
1606 N->getOperand(1),
1607 N->getOperand(2),
1608 N->getOperand(3),
1609 N->getOperand(4),
1610 N->getOperand(5),
1611 N->getOperand(6),
1612 N->getOperand(7),
1613 N->getOperand(8),
1614 N->getOperand(9),
1615 N->getOperand(10),
1616 N->getOperand(11),
1617 N->getOperand(12),
1618 N->getOperand(13),
1619 N->getOperand(14),
1620 N->getOperand(15),
1621 N->getOperand(16),
1622 N->getOperand(17),
1623 N->getOperand(18),
1624 };
1625 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1626 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1627 NewArgs, 19);
1628 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001629 }
1630 return SDValue();
1631}