blob: a2bc2c3a9fa8fe0061ecd79de0629e3f3ec62d49 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
36 computeRegisterProperties();
37
38 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
39 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
40 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
41 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
42
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000043 setOperationAction(ISD::FCOS, MVT::f32, Custom);
44 setOperationAction(ISD::FSIN, MVT::f32, Custom);
45
Tom Stellarda8b03512012-12-21 16:33:24 +000046 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
47 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
48 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
49 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000050 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
51
Tom Stellard492ebea2013-03-08 15:37:07 +000052 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
53 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000054
55 setOperationAction(ISD::FSUB, MVT::f32, Expand);
56
57 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
58 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
59 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000060
Tom Stellard75aadc22012-12-11 21:25:42 +000061 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
62 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
63
Tom Stellarde8f9f282013-03-08 15:37:05 +000064 setOperationAction(ISD::SETCC, MVT::i32, Expand);
65 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
67
68 setOperationAction(ISD::SELECT, MVT::i32, Custom);
69 setOperationAction(ISD::SELECT, MVT::f32, Custom);
70
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000071 // Legalize loads and stores to the private address space.
72 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000073 setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000074 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000075 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
76 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
77 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
78 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000079 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000081 setOperationAction(ISD::STORE, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000082 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
83
Tom Stellard365366f2013-01-23 02:09:06 +000084 setOperationAction(ISD::LOAD, MVT::i32, Custom);
85 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000086 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
87
Tom Stellard75aadc22012-12-11 21:25:42 +000088 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000089 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000090 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000091 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +000092
Michel Danzer49812b52013-07-10 16:37:07 +000093 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
94
Tom Stellardb852af52013-03-08 15:37:03 +000095 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000096 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +000097 setSchedulingPreference(Sched::VLIW);
98}
99
100MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
101 MachineInstr * MI, MachineBasicBlock * BB) const {
102 MachineFunction * MF = BB->getParent();
103 MachineRegisterInfo &MRI = MF->getRegInfo();
104 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000105 const R600InstrInfo *TII =
106 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000107
108 switch (MI->getOpcode()) {
109 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000110 case AMDGPU::CLAMP_R600: {
111 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
112 AMDGPU::MOV,
113 MI->getOperand(0).getReg(),
114 MI->getOperand(1).getReg());
115 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
116 break;
117 }
118
119 case AMDGPU::FABS_R600: {
120 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
121 AMDGPU::MOV,
122 MI->getOperand(0).getReg(),
123 MI->getOperand(1).getReg());
124 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
125 break;
126 }
127
128 case AMDGPU::FNEG_R600: {
129 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
130 AMDGPU::MOV,
131 MI->getOperand(0).getReg(),
132 MI->getOperand(1).getReg());
133 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
134 break;
135 }
136
Tom Stellard75aadc22012-12-11 21:25:42 +0000137 case AMDGPU::MASK_WRITE: {
138 unsigned maskedRegister = MI->getOperand(0).getReg();
139 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
140 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
141 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
142 break;
143 }
144
Tom Stellardc026e8b2013-06-28 15:47:08 +0000145 case AMDGPU::LDS_READ_RET: {
146 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(MI->getOpcode()),
148 AMDGPU::OQAP);
149 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
150 NewMI.addOperand(MI->getOperand(i));
151 }
152 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
153 MI->getOperand(0).getReg(),
154 AMDGPU::OQAP);
155 break;
156 }
157
Tom Stellard75aadc22012-12-11 21:25:42 +0000158 case AMDGPU::MOV_IMM_F32:
159 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
160 MI->getOperand(1).getFPImm()->getValueAPF()
161 .bitcastToAPInt().getZExtValue());
162 break;
163 case AMDGPU::MOV_IMM_I32:
164 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
165 MI->getOperand(1).getImm());
166 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000167 case AMDGPU::CONST_COPY: {
168 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
169 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000170 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000171 MI->getOperand(1).getImm());
172 break;
173 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000174
175 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
176 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
177 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
178
179 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
180 .addOperand(MI->getOperand(0))
181 .addOperand(MI->getOperand(1))
182 .addImm(EOP); // Set End of program bit
183 break;
184 }
185
Tom Stellard75aadc22012-12-11 21:25:42 +0000186 case AMDGPU::TXD: {
187 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
188 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000189 MachineOperand &RID = MI->getOperand(4);
190 MachineOperand &SID = MI->getOperand(5);
191 unsigned TextureId = MI->getOperand(6).getImm();
192 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
193 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000194
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000195 switch (TextureId) {
196 case 5: // Rect
197 CTX = CTY = 0;
198 break;
199 case 6: // Shadow1D
200 SrcW = SrcZ;
201 break;
202 case 7: // Shadow2D
203 SrcW = SrcZ;
204 break;
205 case 8: // ShadowRect
206 CTX = CTY = 0;
207 SrcW = SrcZ;
208 break;
209 case 9: // 1DArray
210 SrcZ = SrcY;
211 CTZ = 0;
212 break;
213 case 10: // 2DArray
214 CTZ = 0;
215 break;
216 case 11: // Shadow1DArray
217 SrcZ = SrcY;
218 CTZ = 0;
219 break;
220 case 12: // Shadow2DArray
221 CTZ = 0;
222 break;
223 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000224 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
225 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 .addImm(SrcX)
227 .addImm(SrcY)
228 .addImm(SrcZ)
229 .addImm(SrcW)
230 .addImm(0)
231 .addImm(0)
232 .addImm(0)
233 .addImm(0)
234 .addImm(1)
235 .addImm(2)
236 .addImm(3)
237 .addOperand(RID)
238 .addOperand(SID)
239 .addImm(CTX)
240 .addImm(CTY)
241 .addImm(CTZ)
242 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000243 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
244 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000245 .addImm(SrcX)
246 .addImm(SrcY)
247 .addImm(SrcZ)
248 .addImm(SrcW)
249 .addImm(0)
250 .addImm(0)
251 .addImm(0)
252 .addImm(0)
253 .addImm(1)
254 .addImm(2)
255 .addImm(3)
256 .addOperand(RID)
257 .addOperand(SID)
258 .addImm(CTX)
259 .addImm(CTY)
260 .addImm(CTZ)
261 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000262 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
263 .addOperand(MI->getOperand(0))
264 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000265 .addImm(SrcX)
266 .addImm(SrcY)
267 .addImm(SrcZ)
268 .addImm(SrcW)
269 .addImm(0)
270 .addImm(0)
271 .addImm(0)
272 .addImm(0)
273 .addImm(1)
274 .addImm(2)
275 .addImm(3)
276 .addOperand(RID)
277 .addOperand(SID)
278 .addImm(CTX)
279 .addImm(CTY)
280 .addImm(CTZ)
281 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 .addReg(T0, RegState::Implicit)
283 .addReg(T1, RegState::Implicit);
284 break;
285 }
286
287 case AMDGPU::TXD_SHADOW: {
288 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
289 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000290 MachineOperand &RID = MI->getOperand(4);
291 MachineOperand &SID = MI->getOperand(5);
292 unsigned TextureId = MI->getOperand(6).getImm();
293 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
294 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
295
296 switch (TextureId) {
297 case 5: // Rect
298 CTX = CTY = 0;
299 break;
300 case 6: // Shadow1D
301 SrcW = SrcZ;
302 break;
303 case 7: // Shadow2D
304 SrcW = SrcZ;
305 break;
306 case 8: // ShadowRect
307 CTX = CTY = 0;
308 SrcW = SrcZ;
309 break;
310 case 9: // 1DArray
311 SrcZ = SrcY;
312 CTZ = 0;
313 break;
314 case 10: // 2DArray
315 CTZ = 0;
316 break;
317 case 11: // Shadow1DArray
318 SrcZ = SrcY;
319 CTZ = 0;
320 break;
321 case 12: // Shadow2DArray
322 CTZ = 0;
323 break;
324 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000325
326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 .addImm(SrcX)
348 .addImm(SrcY)
349 .addImm(SrcZ)
350 .addImm(SrcW)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(1)
356 .addImm(2)
357 .addImm(3)
358 .addOperand(RID)
359 .addOperand(SID)
360 .addImm(CTX)
361 .addImm(CTY)
362 .addImm(CTZ)
363 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
365 .addOperand(MI->getOperand(0))
366 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 .addReg(T0, RegState::Implicit)
385 .addReg(T1, RegState::Implicit);
386 break;
387 }
388
389 case AMDGPU::BRANCH:
390 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000391 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000392 break;
393
394 case AMDGPU::BRANCH_COND_f32: {
395 MachineInstr *NewMI =
396 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
397 AMDGPU::PREDICATE_BIT)
398 .addOperand(MI->getOperand(1))
399 .addImm(OPCODE_IS_NOT_ZERO)
400 .addImm(0); // Flags
401 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000403 .addOperand(MI->getOperand(0))
404 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
405 break;
406 }
407
408 case AMDGPU::BRANCH_COND_i32: {
409 MachineInstr *NewMI =
410 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
411 AMDGPU::PREDICATE_BIT)
412 .addOperand(MI->getOperand(1))
413 .addImm(OPCODE_IS_NOT_ZERO_INT)
414 .addImm(0); // Flags
415 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000416 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addOperand(MI->getOperand(0))
418 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
419 break;
420 }
421
Tom Stellard75aadc22012-12-11 21:25:42 +0000422 case AMDGPU::EG_ExportSwz:
423 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000424 // Instruction is left unmodified if its not the last one of its type
425 bool isLastInstructionOfItsType = true;
426 unsigned InstExportType = MI->getOperand(1).getImm();
427 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
428 EndBlock = BB->end(); NextExportInst != EndBlock;
429 NextExportInst = llvm::next(NextExportInst)) {
430 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
431 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
432 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
433 .getImm();
434 if (CurrentInstExportType == InstExportType) {
435 isLastInstructionOfItsType = false;
436 break;
437 }
438 }
439 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000440 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000441 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000442 return BB;
443 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
444 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
445 .addOperand(MI->getOperand(0))
446 .addOperand(MI->getOperand(1))
447 .addOperand(MI->getOperand(2))
448 .addOperand(MI->getOperand(3))
449 .addOperand(MI->getOperand(4))
450 .addOperand(MI->getOperand(5))
451 .addOperand(MI->getOperand(6))
452 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000453 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 break;
455 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000456 case AMDGPU::RETURN: {
457 // RETURN instructions must have the live-out registers as implicit uses,
458 // otherwise they appear dead.
459 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
460 MachineInstrBuilder MIB(*MF, MI);
461 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
462 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
463 return BB;
464 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000465 }
466
467 MI->eraseFromParent();
468 return BB;
469}
470
471//===----------------------------------------------------------------------===//
472// Custom DAG Lowering Operations
473//===----------------------------------------------------------------------===//
474
Tom Stellard75aadc22012-12-11 21:25:42 +0000475SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000476 MachineFunction &MF = DAG.getMachineFunction();
477 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 switch (Op.getOpcode()) {
479 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000480 case ISD::FCOS:
481 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
483 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000485 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000486 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000487 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000488 case ISD::INTRINSIC_VOID: {
489 SDValue Chain = Op.getOperand(0);
490 unsigned IntrinsicID =
491 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
492 switch (IntrinsicID) {
493 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
495 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000496 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000497 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000499 case AMDGPUIntrinsic::R600_store_swizzle: {
500 const SDValue Args[8] = {
501 Chain,
502 Op.getOperand(2), // Export Value
503 Op.getOperand(3), // ArrayBase
504 Op.getOperand(4), // Type
505 DAG.getConstant(0, MVT::i32), // SWZ_X
506 DAG.getConstant(1, MVT::i32), // SWZ_Y
507 DAG.getConstant(2, MVT::i32), // SWZ_Z
508 DAG.getConstant(3, MVT::i32) // SWZ_W
509 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000510 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000511 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000512 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000513
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 // default for switch(IntrinsicID)
515 default: break;
516 }
517 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
518 break;
519 }
520 case ISD::INTRINSIC_WO_CHAIN: {
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
523 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000524 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 switch(IntrinsicID) {
526 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
527 case AMDGPUIntrinsic::R600_load_input: {
528 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
529 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000530 MachineFunction &MF = DAG.getMachineFunction();
531 MachineRegisterInfo &MRI = MF.getRegInfo();
532 MRI.addLiveIn(Reg);
533 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000534 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000536
537 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000538 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000539 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
540 MachineSDNode *interp;
541 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000542 const MachineFunction &MF = DAG.getMachineFunction();
543 const R600InstrInfo *TII =
544 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000545 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
546 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
547 return DAG.getTargetExtractSubreg(
548 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
549 DL, MVT::f32, SDValue(interp, 0));
550 }
551
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000552 MachineFunction &MF = DAG.getMachineFunction();
553 MachineRegisterInfo &MRI = MF.getRegInfo();
554 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
555 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
556 MRI.addLiveIn(RegisterI);
557 MRI.addLiveIn(RegisterJ);
558 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
559 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
560 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
561 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
562
Tom Stellard41afe6a2013-02-05 17:09:14 +0000563 if (slot % 4 < 2)
564 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
565 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000566 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000567 else
568 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
569 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000570 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000571 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000572 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000573 case AMDGPUIntrinsic::R600_tex:
574 case AMDGPUIntrinsic::R600_texc:
575 case AMDGPUIntrinsic::R600_txl:
576 case AMDGPUIntrinsic::R600_txlc:
577 case AMDGPUIntrinsic::R600_txb:
578 case AMDGPUIntrinsic::R600_txbc:
579 case AMDGPUIntrinsic::R600_txf:
580 case AMDGPUIntrinsic::R600_txq:
581 case AMDGPUIntrinsic::R600_ddx:
582 case AMDGPUIntrinsic::R600_ddy: {
583 unsigned TextureOp;
584 switch (IntrinsicID) {
585 case AMDGPUIntrinsic::R600_tex:
586 TextureOp = 0;
587 break;
588 case AMDGPUIntrinsic::R600_texc:
589 TextureOp = 1;
590 break;
591 case AMDGPUIntrinsic::R600_txl:
592 TextureOp = 2;
593 break;
594 case AMDGPUIntrinsic::R600_txlc:
595 TextureOp = 3;
596 break;
597 case AMDGPUIntrinsic::R600_txb:
598 TextureOp = 4;
599 break;
600 case AMDGPUIntrinsic::R600_txbc:
601 TextureOp = 5;
602 break;
603 case AMDGPUIntrinsic::R600_txf:
604 TextureOp = 6;
605 break;
606 case AMDGPUIntrinsic::R600_txq:
607 TextureOp = 7;
608 break;
609 case AMDGPUIntrinsic::R600_ddx:
610 TextureOp = 8;
611 break;
612 case AMDGPUIntrinsic::R600_ddy:
613 TextureOp = 9;
614 break;
615 default:
616 llvm_unreachable("Unknow Texture Operation");
617 }
618
619 SDValue TexArgs[19] = {
620 DAG.getConstant(TextureOp, MVT::i32),
621 Op.getOperand(1),
622 DAG.getConstant(0, MVT::i32),
623 DAG.getConstant(1, MVT::i32),
624 DAG.getConstant(2, MVT::i32),
625 DAG.getConstant(3, MVT::i32),
626 Op.getOperand(2),
627 Op.getOperand(3),
628 Op.getOperand(4),
629 DAG.getConstant(0, MVT::i32),
630 DAG.getConstant(1, MVT::i32),
631 DAG.getConstant(2, MVT::i32),
632 DAG.getConstant(3, MVT::i32),
633 Op.getOperand(5),
634 Op.getOperand(6),
635 Op.getOperand(7),
636 Op.getOperand(8),
637 Op.getOperand(9),
638 Op.getOperand(10)
639 };
640 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
641 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000642 case AMDGPUIntrinsic::AMDGPU_dp4: {
643 SDValue Args[8] = {
644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
645 DAG.getConstant(0, MVT::i32)),
646 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
647 DAG.getConstant(0, MVT::i32)),
648 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
649 DAG.getConstant(1, MVT::i32)),
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
651 DAG.getConstant(1, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
653 DAG.getConstant(2, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
655 DAG.getConstant(2, MVT::i32)),
656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
657 DAG.getConstant(3, MVT::i32)),
658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
659 DAG.getConstant(3, MVT::i32))
660 };
661 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
662 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000663
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000664 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000665 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000666 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000667 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000668 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000670 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000676 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000677 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return LowerImplicitParameter(DAG, VT, DL, 8);
682
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000683 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
685 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
688 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
697 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
700 AMDGPU::T0_Z, VT);
701 }
702 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
703 break;
704 }
705 } // end switch(Op.getOpcode())
706 return SDValue();
707}
708
709void R600TargetLowering::ReplaceNodeResults(SDNode *N,
710 SmallVectorImpl<SDValue> &Results,
711 SelectionDAG &DAG) const {
712 switch (N->getOpcode()) {
713 default: return;
714 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000715 return;
716 case ISD::LOAD: {
717 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
718 Results.push_back(SDValue(Node, 0));
719 Results.push_back(SDValue(Node, 1));
720 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
721 // function
722 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
723 return;
724 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000725 case ISD::STORE:
726 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
727 Results.push_back(SDValue(Node, 0));
728 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 }
730}
731
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000732SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
733 // On hw >= R700, COS/SIN input must be between -1. and 1.
734 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
735 EVT VT = Op.getValueType();
736 SDValue Arg = Op.getOperand(0);
737 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
738 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
739 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
740 DAG.getConstantFP(0.15915494309, MVT::f32)),
741 DAG.getConstantFP(0.5, MVT::f32)));
742 unsigned TrigNode;
743 switch (Op.getOpcode()) {
744 case ISD::FCOS:
745 TrigNode = AMDGPUISD::COS_HW;
746 break;
747 case ISD::FSIN:
748 TrigNode = AMDGPUISD::SIN_HW;
749 break;
750 default:
751 llvm_unreachable("Wrong trig opcode");
752 }
753 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
754 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
755 DAG.getConstantFP(-0.5, MVT::f32)));
756 if (Gen >= AMDGPUSubtarget::R700)
757 return TrigVal;
758 // On R600 hw, COS/SIN input must be between -Pi and Pi.
759 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
760 DAG.getConstantFP(3.14159265359, MVT::f32));
761}
762
Tom Stellard75aadc22012-12-11 21:25:42 +0000763SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
764 return DAG.getNode(
765 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000766 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000767 MVT::i1,
768 Op, DAG.getConstantFP(0.0f, MVT::f32),
769 DAG.getCondCode(ISD::SETNE)
770 );
771}
772
Tom Stellard75aadc22012-12-11 21:25:42 +0000773SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000774 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 unsigned DwordOffset) const {
776 unsigned ByteOffset = DwordOffset * 4;
777 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000778 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000779
780 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
781 assert(isInt<16>(ByteOffset));
782
783 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
784 DAG.getConstant(ByteOffset, MVT::i32), // PTR
785 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
786 false, false, false, 0);
787}
788
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000789SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
790
791 MachineFunction &MF = DAG.getMachineFunction();
792 const AMDGPUFrameLowering *TFL =
793 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
794
795 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
796 assert(FIN);
797
798 unsigned FrameIndex = FIN->getIndex();
799 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
800 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
801}
802
Tom Stellard75aadc22012-12-11 21:25:42 +0000803bool R600TargetLowering::isZero(SDValue Op) const {
804 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
805 return Cst->isNullValue();
806 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
807 return CstFP->isZero();
808 } else {
809 return false;
810 }
811}
812
813SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000814 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000815 EVT VT = Op.getValueType();
816
817 SDValue LHS = Op.getOperand(0);
818 SDValue RHS = Op.getOperand(1);
819 SDValue True = Op.getOperand(2);
820 SDValue False = Op.getOperand(3);
821 SDValue CC = Op.getOperand(4);
822 SDValue Temp;
823
824 // LHS and RHS are guaranteed to be the same value type
825 EVT CompareVT = LHS.getValueType();
826
827 // Check if we can lower this to a native operation.
828
Tom Stellard2add82d2013-03-08 15:37:09 +0000829 // Try to lower to a SET* instruction:
830 //
831 // SET* can match the following patterns:
832 //
833 // select_cc f32, f32, -1, 0, cc_any
834 // select_cc f32, f32, 1.0f, 0.0f, cc_any
835 // select_cc i32, i32, -1, 0, cc_any
836 //
837
838 // Move hardware True/False values to the correct operand.
839 if (isHWTrueValue(False) && isHWFalseValue(True)) {
840 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
841 std::swap(False, True);
842 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
843 }
844
845 if (isHWTrueValue(True) && isHWFalseValue(False) &&
846 (CompareVT == VT || VT == MVT::i32)) {
847 // This can be matched by a SET* instruction.
848 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
849 }
850
Tom Stellard75aadc22012-12-11 21:25:42 +0000851 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000852 //
853 // CND* can match the following patterns:
854 //
855 // select_cc f32, 0.0, f32, f32, cc_any
856 // select_cc f32, 0.0, i32, i32, cc_any
857 // select_cc i32, 0, f32, f32, cc_any
858 // select_cc i32, 0, i32, i32, cc_any
859 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000860 if (isZero(LHS) || isZero(RHS)) {
861 SDValue Cond = (isZero(LHS) ? RHS : LHS);
862 SDValue Zero = (isZero(LHS) ? LHS : RHS);
863 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
864 if (CompareVT != VT) {
865 // Bitcast True / False to the correct types. This will end up being
866 // a nop, but it allows us to define only a single pattern in the
867 // .TD files for each CND* instruction rather than having to have
868 // one pattern for integer True/False and one for fp True/False
869 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
870 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
871 }
872 if (isZero(LHS)) {
873 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
874 }
875
876 switch (CCOpcode) {
877 case ISD::SETONE:
878 case ISD::SETUNE:
879 case ISD::SETNE:
880 case ISD::SETULE:
881 case ISD::SETULT:
882 case ISD::SETOLE:
883 case ISD::SETOLT:
884 case ISD::SETLE:
885 case ISD::SETLT:
886 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
887 Temp = True;
888 True = False;
889 False = Temp;
890 break;
891 default:
892 break;
893 }
894 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
895 Cond, Zero,
896 True, False,
897 DAG.getCondCode(CCOpcode));
898 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
899 }
900
Tom Stellard75aadc22012-12-11 21:25:42 +0000901
902 // Possible Min/Max pattern
903 SDValue MinMax = LowerMinMax(Op, DAG);
904 if (MinMax.getNode()) {
905 return MinMax;
906 }
907
908 // If we make it this for it means we have no native instructions to handle
909 // this SELECT_CC, so we must lower it.
910 SDValue HWTrue, HWFalse;
911
912 if (CompareVT == MVT::f32) {
913 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
914 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
915 } else if (CompareVT == MVT::i32) {
916 HWTrue = DAG.getConstant(-1, CompareVT);
917 HWFalse = DAG.getConstant(0, CompareVT);
918 }
919 else {
920 assert(!"Unhandled value type in LowerSELECT_CC");
921 }
922
923 // Lower this unsupported SELECT_CC into a combination of two supported
924 // SELECT_CC operations.
925 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
926
927 return DAG.getNode(ISD::SELECT_CC, DL, VT,
928 Cond, HWFalse,
929 True, False,
930 DAG.getCondCode(ISD::SETNE));
931}
932
933SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
934 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000935 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000936 Op.getValueType(),
937 Op.getOperand(0),
938 DAG.getConstant(0, MVT::i32),
939 Op.getOperand(1),
940 Op.getOperand(2),
941 DAG.getCondCode(ISD::SETNE));
942}
943
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000944/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
945/// convert these pointers to a register index. Each register holds
946/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
947/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
948/// for indirect addressing.
949SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
950 unsigned StackWidth,
951 SelectionDAG &DAG) const {
952 unsigned SRLPad;
953 switch(StackWidth) {
954 case 1:
955 SRLPad = 2;
956 break;
957 case 2:
958 SRLPad = 3;
959 break;
960 case 4:
961 SRLPad = 4;
962 break;
963 default: llvm_unreachable("Invalid stack width");
964 }
965
Andrew Trickef9de2a2013-05-25 02:42:55 +0000966 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000967 DAG.getConstant(SRLPad, MVT::i32));
968}
969
970void R600TargetLowering::getStackAddress(unsigned StackWidth,
971 unsigned ElemIdx,
972 unsigned &Channel,
973 unsigned &PtrIncr) const {
974 switch (StackWidth) {
975 default:
976 case 1:
977 Channel = 0;
978 if (ElemIdx > 0) {
979 PtrIncr = 1;
980 } else {
981 PtrIncr = 0;
982 }
983 break;
984 case 2:
985 Channel = ElemIdx % 2;
986 if (ElemIdx == 2) {
987 PtrIncr = 1;
988 } else {
989 PtrIncr = 0;
990 }
991 break;
992 case 4:
993 Channel = ElemIdx;
994 PtrIncr = 0;
995 break;
996 }
997}
998
Tom Stellard75aadc22012-12-11 21:25:42 +0000999SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001000 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001001 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1002 SDValue Chain = Op.getOperand(0);
1003 SDValue Value = Op.getOperand(1);
1004 SDValue Ptr = Op.getOperand(2);
1005
1006 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
1007 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
1008 // Convert pointer from byte address to dword address.
1009 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1010 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1011 Ptr, DAG.getConstant(2, MVT::i32)));
1012
1013 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1014 assert(!"Truncated and indexed stores not supported yet");
1015 } else {
1016 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1017 }
1018 return Chain;
1019 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001020
1021 EVT ValueVT = Value.getValueType();
1022
1023 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1024 return SDValue();
1025 }
1026
1027 // Lowering for indirect addressing
1028
1029 const MachineFunction &MF = DAG.getMachineFunction();
1030 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1031 getTargetMachine().getFrameLowering());
1032 unsigned StackWidth = TFL->getStackWidth(MF);
1033
1034 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1035
1036 if (ValueVT.isVector()) {
1037 unsigned NumElemVT = ValueVT.getVectorNumElements();
1038 EVT ElemVT = ValueVT.getVectorElementType();
1039 SDValue Stores[4];
1040
1041 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1042 "vector width in load");
1043
1044 for (unsigned i = 0; i < NumElemVT; ++i) {
1045 unsigned Channel, PtrIncr;
1046 getStackAddress(StackWidth, i, Channel, PtrIncr);
1047 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1048 DAG.getConstant(PtrIncr, MVT::i32));
1049 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1050 Value, DAG.getConstant(i, MVT::i32));
1051
1052 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1053 Chain, Elem, Ptr,
1054 DAG.getTargetConstant(Channel, MVT::i32));
1055 }
1056 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1057 } else {
1058 if (ValueVT == MVT::i8) {
1059 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1060 }
1061 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001062 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001063 }
1064
1065 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001066}
1067
Tom Stellard365366f2013-01-23 02:09:06 +00001068// return (512 + (kc_bank << 12)
1069static int
1070ConstantAddressBlock(unsigned AddressSpace) {
1071 switch (AddressSpace) {
1072 case AMDGPUAS::CONSTANT_BUFFER_0:
1073 return 512;
1074 case AMDGPUAS::CONSTANT_BUFFER_1:
1075 return 512 + 4096;
1076 case AMDGPUAS::CONSTANT_BUFFER_2:
1077 return 512 + 4096 * 2;
1078 case AMDGPUAS::CONSTANT_BUFFER_3:
1079 return 512 + 4096 * 3;
1080 case AMDGPUAS::CONSTANT_BUFFER_4:
1081 return 512 + 4096 * 4;
1082 case AMDGPUAS::CONSTANT_BUFFER_5:
1083 return 512 + 4096 * 5;
1084 case AMDGPUAS::CONSTANT_BUFFER_6:
1085 return 512 + 4096 * 6;
1086 case AMDGPUAS::CONSTANT_BUFFER_7:
1087 return 512 + 4096 * 7;
1088 case AMDGPUAS::CONSTANT_BUFFER_8:
1089 return 512 + 4096 * 8;
1090 case AMDGPUAS::CONSTANT_BUFFER_9:
1091 return 512 + 4096 * 9;
1092 case AMDGPUAS::CONSTANT_BUFFER_10:
1093 return 512 + 4096 * 10;
1094 case AMDGPUAS::CONSTANT_BUFFER_11:
1095 return 512 + 4096 * 11;
1096 case AMDGPUAS::CONSTANT_BUFFER_12:
1097 return 512 + 4096 * 12;
1098 case AMDGPUAS::CONSTANT_BUFFER_13:
1099 return 512 + 4096 * 13;
1100 case AMDGPUAS::CONSTANT_BUFFER_14:
1101 return 512 + 4096 * 14;
1102 case AMDGPUAS::CONSTANT_BUFFER_15:
1103 return 512 + 4096 * 15;
1104 default:
1105 return -1;
1106 }
1107}
1108
1109SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1110{
1111 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001112 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001113 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1114 SDValue Chain = Op.getOperand(0);
1115 SDValue Ptr = Op.getOperand(1);
1116 SDValue LoweredLoad;
1117
1118 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1119 if (ConstantBlock > -1) {
1120 SDValue Result;
1121 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001122 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1123 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001124 SDValue Slots[4];
1125 for (unsigned i = 0; i < 4; i++) {
1126 // We want Const position encoded with the following formula :
1127 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1128 // const_index is Ptr computed by llvm using an alignment of 16.
1129 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1130 // then div by 4 at the ISel step
1131 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1132 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1133 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1134 }
1135 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1136 } else {
1137 // non constant ptr cant be folded, keeps it as a v4f32 load
1138 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001139 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001140 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001141 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001142 );
1143 }
1144
1145 if (!VT.isVector()) {
1146 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1147 DAG.getConstant(0, MVT::i32));
1148 }
1149
1150 SDValue MergedValues[2] = {
1151 Result,
1152 Chain
1153 };
1154 return DAG.getMergeValues(MergedValues, 2, DL);
1155 }
1156
Tom Stellard84021442013-07-23 01:48:24 +00001157 // For most operations returning SDValue() will result int he node being
1158 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1159 // we need to manually expand loads that may be legal in some address spaces
1160 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1161 // for compute shaders, since the data is sign extended when it is uploaded
1162 // to the buffer. Howerver SEXT loads from other addresspaces are not
1163 // supported, so we need to expand them here.
1164 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1165 EVT MemVT = LoadNode->getMemoryVT();
1166 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1167 SDValue ShiftAmount =
1168 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1169 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1170 LoadNode->getPointerInfo(), MemVT,
1171 LoadNode->isVolatile(),
1172 LoadNode->isNonTemporal(),
1173 LoadNode->getAlignment());
1174 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1175 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1176
1177 SDValue MergedValues[2] = { Sra, Chain };
1178 return DAG.getMergeValues(MergedValues, 2, DL);
1179 }
1180
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001181 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1182 return SDValue();
1183 }
1184
1185 // Lowering for indirect addressing
1186 const MachineFunction &MF = DAG.getMachineFunction();
1187 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1188 getTargetMachine().getFrameLowering());
1189 unsigned StackWidth = TFL->getStackWidth(MF);
1190
1191 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1192
1193 if (VT.isVector()) {
1194 unsigned NumElemVT = VT.getVectorNumElements();
1195 EVT ElemVT = VT.getVectorElementType();
1196 SDValue Loads[4];
1197
1198 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1199 "vector width in load");
1200
1201 for (unsigned i = 0; i < NumElemVT; ++i) {
1202 unsigned Channel, PtrIncr;
1203 getStackAddress(StackWidth, i, Channel, PtrIncr);
1204 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1205 DAG.getConstant(PtrIncr, MVT::i32));
1206 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1207 Chain, Ptr,
1208 DAG.getTargetConstant(Channel, MVT::i32),
1209 Op.getOperand(2));
1210 }
1211 for (unsigned i = NumElemVT; i < 4; ++i) {
1212 Loads[i] = DAG.getUNDEF(ElemVT);
1213 }
1214 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1215 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1216 } else {
1217 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1218 Chain, Ptr,
1219 DAG.getTargetConstant(0, MVT::i32), // Channel
1220 Op.getOperand(2));
1221 }
1222
1223 SDValue Ops[2];
1224 Ops[0] = LoweredLoad;
1225 Ops[1] = Chain;
1226
1227 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001228}
Tom Stellard75aadc22012-12-11 21:25:42 +00001229
Tom Stellard75aadc22012-12-11 21:25:42 +00001230/// XXX Only kernel functions are supported, so we can assume for now that
1231/// every function is a kernel function, but in the future we should use
1232/// separate calling conventions for kernel and non-kernel functions.
1233SDValue R600TargetLowering::LowerFormalArguments(
1234 SDValue Chain,
1235 CallingConv::ID CallConv,
1236 bool isVarArg,
1237 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001238 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001239 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001240 SmallVector<CCValAssign, 16> ArgLocs;
1241 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1242 getTargetMachine(), ArgLocs, *DAG.getContext());
1243
1244 AnalyzeFormalArguments(CCInfo, Ins);
1245
Tom Stellard1e803092013-07-23 01:48:18 +00001246 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001247 CCValAssign &VA = ArgLocs[i];
1248 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001249
Tom Stellard75aadc22012-12-11 21:25:42 +00001250 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001251 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001252
1253 // The first 36 bytes of the input buffer contains information about
1254 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001255 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001256 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001257 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1258 false, false, 4); // 4 is the prefered alignment for
1259 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001260 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001261 }
1262 return Chain;
1263}
1264
Matt Arsenault758659232013-05-18 00:21:46 +00001265EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001266 if (!VT.isVector()) return MVT::i32;
1267 return VT.changeVectorElementTypeToInteger();
1268}
1269
Benjamin Kramer193960c2013-06-11 13:32:25 +00001270static SDValue
1271CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1272 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001273 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1274 assert(RemapSwizzle.empty());
1275 SDValue NewBldVec[4] = {
1276 VectorEntry.getOperand(0),
1277 VectorEntry.getOperand(1),
1278 VectorEntry.getOperand(2),
1279 VectorEntry.getOperand(3)
1280 };
1281
1282 for (unsigned i = 0; i < 4; i++) {
1283 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1284 if (C->isZero()) {
1285 RemapSwizzle[i] = 4; // SEL_0
1286 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1287 } else if (C->isExactlyValue(1.0)) {
1288 RemapSwizzle[i] = 5; // SEL_1
1289 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1290 }
1291 }
1292
1293 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1294 continue;
1295 for (unsigned j = 0; j < i; j++) {
1296 if (NewBldVec[i] == NewBldVec[j]) {
1297 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1298 RemapSwizzle[i] = j;
1299 break;
1300 }
1301 }
1302 }
1303
1304 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1305 VectorEntry.getValueType(), NewBldVec, 4);
1306}
1307
Benjamin Kramer193960c2013-06-11 13:32:25 +00001308static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1309 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001310 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1311 assert(RemapSwizzle.empty());
1312 SDValue NewBldVec[4] = {
1313 VectorEntry.getOperand(0),
1314 VectorEntry.getOperand(1),
1315 VectorEntry.getOperand(2),
1316 VectorEntry.getOperand(3)
1317 };
1318 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001319 for (unsigned i = 0; i < 4; i++)
1320 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001321
1322 for (unsigned i = 0; i < 4; i++) {
1323 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1324 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1325 ->getZExtValue();
1326 if (!isUnmovable[Idx]) {
1327 // Swap i and Idx
1328 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001329 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001330 }
1331 isUnmovable[Idx] = true;
1332 }
1333 }
1334
1335 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1336 VectorEntry.getValueType(), NewBldVec, 4);
1337}
1338
1339
1340SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1341SDValue Swz[4], SelectionDAG &DAG) const {
1342 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1343 // Old -> New swizzle values
1344 DenseMap<unsigned, unsigned> SwizzleRemap;
1345
1346 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1347 for (unsigned i = 0; i < 4; i++) {
1348 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1349 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1350 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1351 }
1352
1353 SwizzleRemap.clear();
1354 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1355 for (unsigned i = 0; i < 4; i++) {
1356 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1357 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1358 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1359 }
1360
1361 return BuildVector;
1362}
1363
1364
Tom Stellard75aadc22012-12-11 21:25:42 +00001365//===----------------------------------------------------------------------===//
1366// Custom DAG Optimizations
1367//===----------------------------------------------------------------------===//
1368
1369SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1370 DAGCombinerInfo &DCI) const {
1371 SelectionDAG &DAG = DCI.DAG;
1372
1373 switch (N->getOpcode()) {
1374 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1375 case ISD::FP_ROUND: {
1376 SDValue Arg = N->getOperand(0);
1377 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001378 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001379 Arg.getOperand(0));
1380 }
1381 break;
1382 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001383
1384 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1385 // (i32 select_cc f32, f32, -1, 0 cc)
1386 //
1387 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1388 // this to one of the SET*_DX10 instructions.
1389 case ISD::FP_TO_SINT: {
1390 SDValue FNeg = N->getOperand(0);
1391 if (FNeg.getOpcode() != ISD::FNEG) {
1392 return SDValue();
1393 }
1394 SDValue SelectCC = FNeg.getOperand(0);
1395 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1396 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1397 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1398 !isHWTrueValue(SelectCC.getOperand(2)) ||
1399 !isHWFalseValue(SelectCC.getOperand(3))) {
1400 return SDValue();
1401 }
1402
Andrew Trickef9de2a2013-05-25 02:42:55 +00001403 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001404 SelectCC.getOperand(0), // LHS
1405 SelectCC.getOperand(1), // RHS
1406 DAG.getConstant(-1, MVT::i32), // True
1407 DAG.getConstant(0, MVT::i32), // Flase
1408 SelectCC.getOperand(4)); // CC
1409
1410 break;
1411 }
Tom Stellard365366f2013-01-23 02:09:06 +00001412 // Extract_vec (Build_vector) generated by custom lowering
1413 // also needs to be customly combined
1414 case ISD::EXTRACT_VECTOR_ELT: {
1415 SDValue Arg = N->getOperand(0);
1416 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1417 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1418 unsigned Element = Const->getZExtValue();
1419 return Arg->getOperand(Element);
1420 }
1421 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001422 if (Arg.getOpcode() == ISD::BITCAST &&
1423 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1424 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1425 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001426 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001427 Arg->getOperand(0).getOperand(Element));
1428 }
1429 }
Tom Stellard365366f2013-01-23 02:09:06 +00001430 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001431
1432 case ISD::SELECT_CC: {
1433 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1434 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001435 //
1436 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1437 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001438 SDValue LHS = N->getOperand(0);
1439 if (LHS.getOpcode() != ISD::SELECT_CC) {
1440 return SDValue();
1441 }
1442
1443 SDValue RHS = N->getOperand(1);
1444 SDValue True = N->getOperand(2);
1445 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001446 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001447
1448 if (LHS.getOperand(2).getNode() != True.getNode() ||
1449 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001450 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001451 return SDValue();
1452 }
1453
Tom Stellard5e524892013-03-08 15:37:11 +00001454 switch (NCC) {
1455 default: return SDValue();
1456 case ISD::SETNE: return LHS;
1457 case ISD::SETEQ: {
1458 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1459 LHSCC = ISD::getSetCCInverse(LHSCC,
1460 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001461 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001462 LHS.getOperand(0),
1463 LHS.getOperand(1),
1464 LHS.getOperand(2),
1465 LHS.getOperand(3),
1466 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001467 }
Tom Stellard5e524892013-03-08 15:37:11 +00001468 }
1469 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001470 case AMDGPUISD::EXPORT: {
1471 SDValue Arg = N->getOperand(1);
1472 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1473 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001474
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001475 SDValue NewArgs[8] = {
1476 N->getOperand(0), // Chain
1477 SDValue(),
1478 N->getOperand(2), // ArrayBase
1479 N->getOperand(3), // Type
1480 N->getOperand(4), // SWZ_X
1481 N->getOperand(5), // SWZ_Y
1482 N->getOperand(6), // SWZ_Z
1483 N->getOperand(7) // SWZ_W
1484 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001485 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001486 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001487 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001488 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001489 case AMDGPUISD::TEXTURE_FETCH: {
1490 SDValue Arg = N->getOperand(1);
1491 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1492 break;
1493
1494 SDValue NewArgs[19] = {
1495 N->getOperand(0),
1496 N->getOperand(1),
1497 N->getOperand(2),
1498 N->getOperand(3),
1499 N->getOperand(4),
1500 N->getOperand(5),
1501 N->getOperand(6),
1502 N->getOperand(7),
1503 N->getOperand(8),
1504 N->getOperand(9),
1505 N->getOperand(10),
1506 N->getOperand(11),
1507 N->getOperand(12),
1508 N->getOperand(13),
1509 N->getOperand(14),
1510 N->getOperand(15),
1511 N->getOperand(16),
1512 N->getOperand(17),
1513 N->getOperand(18),
1514 };
1515 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1516 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1517 NewArgs, 19);
1518 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001519 }
1520 return SDValue();
1521}