blob: dd613d56a60d16653084fc8e81a17d0eba8595b6 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
36 computeRegisterProperties();
37
38 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
39 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
40 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
41 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
42
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000043 setOperationAction(ISD::FCOS, MVT::f32, Custom);
44 setOperationAction(ISD::FSIN, MVT::f32, Custom);
45
Tom Stellarda8b03512012-12-21 16:33:24 +000046 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
47 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
48 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
49 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000050 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
51
Tom Stellard492ebea2013-03-08 15:37:07 +000052 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
53 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000054
55 setOperationAction(ISD::FSUB, MVT::f32, Expand);
56
57 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
58 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
59 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000060
Tom Stellard75aadc22012-12-11 21:25:42 +000061 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
62 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
63
Tom Stellarde8f9f282013-03-08 15:37:05 +000064 setOperationAction(ISD::SETCC, MVT::i32, Expand);
65 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
67
68 setOperationAction(ISD::SELECT, MVT::i32, Custom);
69 setOperationAction(ISD::SELECT, MVT::f32, Custom);
70
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000071 // Legalize loads and stores to the private address space.
72 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000073 setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000074 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000075 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
76 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
77 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
78 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000079 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000081 setOperationAction(ISD::STORE, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000082 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
83
Tom Stellard365366f2013-01-23 02:09:06 +000084 setOperationAction(ISD::LOAD, MVT::i32, Custom);
85 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000086 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
87
Tom Stellard75aadc22012-12-11 21:25:42 +000088 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000089 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000090 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000091 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +000092
Michel Danzer49812b52013-07-10 16:37:07 +000093 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
94
Tom Stellardb852af52013-03-08 15:37:03 +000095 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000096 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +000097 setSchedulingPreference(Sched::VLIW);
98}
99
100MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
101 MachineInstr * MI, MachineBasicBlock * BB) const {
102 MachineFunction * MF = BB->getParent();
103 MachineRegisterInfo &MRI = MF->getRegInfo();
104 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000105 const R600InstrInfo *TII =
106 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000107
108 switch (MI->getOpcode()) {
109 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000110 case AMDGPU::CLAMP_R600: {
111 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
112 AMDGPU::MOV,
113 MI->getOperand(0).getReg(),
114 MI->getOperand(1).getReg());
115 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
116 break;
117 }
118
119 case AMDGPU::FABS_R600: {
120 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
121 AMDGPU::MOV,
122 MI->getOperand(0).getReg(),
123 MI->getOperand(1).getReg());
124 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
125 break;
126 }
127
128 case AMDGPU::FNEG_R600: {
129 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
130 AMDGPU::MOV,
131 MI->getOperand(0).getReg(),
132 MI->getOperand(1).getReg());
133 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
134 break;
135 }
136
Tom Stellard75aadc22012-12-11 21:25:42 +0000137 case AMDGPU::MASK_WRITE: {
138 unsigned maskedRegister = MI->getOperand(0).getReg();
139 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
140 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
141 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
142 break;
143 }
144
Tom Stellardc026e8b2013-06-28 15:47:08 +0000145 case AMDGPU::LDS_READ_RET: {
146 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(MI->getOpcode()),
148 AMDGPU::OQAP);
149 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
150 NewMI.addOperand(MI->getOperand(i));
151 }
152 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
153 MI->getOperand(0).getReg(),
154 AMDGPU::OQAP);
155 break;
156 }
157
Tom Stellard75aadc22012-12-11 21:25:42 +0000158 case AMDGPU::MOV_IMM_F32:
159 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
160 MI->getOperand(1).getFPImm()->getValueAPF()
161 .bitcastToAPInt().getZExtValue());
162 break;
163 case AMDGPU::MOV_IMM_I32:
164 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
165 MI->getOperand(1).getImm());
166 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000167 case AMDGPU::CONST_COPY: {
168 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
169 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000170 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000171 MI->getOperand(1).getImm());
172 break;
173 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000174
175 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
176 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
177 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
178
179 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
180 .addOperand(MI->getOperand(0))
181 .addOperand(MI->getOperand(1))
182 .addImm(EOP); // Set End of program bit
183 break;
184 }
185
Tom Stellard75aadc22012-12-11 21:25:42 +0000186 case AMDGPU::TXD: {
187 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
188 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000189 MachineOperand &RID = MI->getOperand(4);
190 MachineOperand &SID = MI->getOperand(5);
191 unsigned TextureId = MI->getOperand(6).getImm();
192 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
193 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000194
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000195 switch (TextureId) {
196 case 5: // Rect
197 CTX = CTY = 0;
198 break;
199 case 6: // Shadow1D
200 SrcW = SrcZ;
201 break;
202 case 7: // Shadow2D
203 SrcW = SrcZ;
204 break;
205 case 8: // ShadowRect
206 CTX = CTY = 0;
207 SrcW = SrcZ;
208 break;
209 case 9: // 1DArray
210 SrcZ = SrcY;
211 CTZ = 0;
212 break;
213 case 10: // 2DArray
214 CTZ = 0;
215 break;
216 case 11: // Shadow1DArray
217 SrcZ = SrcY;
218 CTZ = 0;
219 break;
220 case 12: // Shadow2DArray
221 CTZ = 0;
222 break;
223 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000224 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
225 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 .addImm(SrcX)
227 .addImm(SrcY)
228 .addImm(SrcZ)
229 .addImm(SrcW)
230 .addImm(0)
231 .addImm(0)
232 .addImm(0)
233 .addImm(0)
234 .addImm(1)
235 .addImm(2)
236 .addImm(3)
237 .addOperand(RID)
238 .addOperand(SID)
239 .addImm(CTX)
240 .addImm(CTY)
241 .addImm(CTZ)
242 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000243 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
244 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000245 .addImm(SrcX)
246 .addImm(SrcY)
247 .addImm(SrcZ)
248 .addImm(SrcW)
249 .addImm(0)
250 .addImm(0)
251 .addImm(0)
252 .addImm(0)
253 .addImm(1)
254 .addImm(2)
255 .addImm(3)
256 .addOperand(RID)
257 .addOperand(SID)
258 .addImm(CTX)
259 .addImm(CTY)
260 .addImm(CTZ)
261 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000262 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
263 .addOperand(MI->getOperand(0))
264 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000265 .addImm(SrcX)
266 .addImm(SrcY)
267 .addImm(SrcZ)
268 .addImm(SrcW)
269 .addImm(0)
270 .addImm(0)
271 .addImm(0)
272 .addImm(0)
273 .addImm(1)
274 .addImm(2)
275 .addImm(3)
276 .addOperand(RID)
277 .addOperand(SID)
278 .addImm(CTX)
279 .addImm(CTY)
280 .addImm(CTZ)
281 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 .addReg(T0, RegState::Implicit)
283 .addReg(T1, RegState::Implicit);
284 break;
285 }
286
287 case AMDGPU::TXD_SHADOW: {
288 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
289 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000290 MachineOperand &RID = MI->getOperand(4);
291 MachineOperand &SID = MI->getOperand(5);
292 unsigned TextureId = MI->getOperand(6).getImm();
293 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
294 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
295
296 switch (TextureId) {
297 case 5: // Rect
298 CTX = CTY = 0;
299 break;
300 case 6: // Shadow1D
301 SrcW = SrcZ;
302 break;
303 case 7: // Shadow2D
304 SrcW = SrcZ;
305 break;
306 case 8: // ShadowRect
307 CTX = CTY = 0;
308 SrcW = SrcZ;
309 break;
310 case 9: // 1DArray
311 SrcZ = SrcY;
312 CTZ = 0;
313 break;
314 case 10: // 2DArray
315 CTZ = 0;
316 break;
317 case 11: // Shadow1DArray
318 SrcZ = SrcY;
319 CTZ = 0;
320 break;
321 case 12: // Shadow2DArray
322 CTZ = 0;
323 break;
324 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000325
326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 .addImm(SrcX)
348 .addImm(SrcY)
349 .addImm(SrcZ)
350 .addImm(SrcW)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(1)
356 .addImm(2)
357 .addImm(3)
358 .addOperand(RID)
359 .addOperand(SID)
360 .addImm(CTX)
361 .addImm(CTY)
362 .addImm(CTZ)
363 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
365 .addOperand(MI->getOperand(0))
366 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 .addReg(T0, RegState::Implicit)
385 .addReg(T1, RegState::Implicit);
386 break;
387 }
388
389 case AMDGPU::BRANCH:
390 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000391 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000392 break;
393
394 case AMDGPU::BRANCH_COND_f32: {
395 MachineInstr *NewMI =
396 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
397 AMDGPU::PREDICATE_BIT)
398 .addOperand(MI->getOperand(1))
399 .addImm(OPCODE_IS_NOT_ZERO)
400 .addImm(0); // Flags
401 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000403 .addOperand(MI->getOperand(0))
404 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
405 break;
406 }
407
408 case AMDGPU::BRANCH_COND_i32: {
409 MachineInstr *NewMI =
410 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
411 AMDGPU::PREDICATE_BIT)
412 .addOperand(MI->getOperand(1))
413 .addImm(OPCODE_IS_NOT_ZERO_INT)
414 .addImm(0); // Flags
415 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000416 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addOperand(MI->getOperand(0))
418 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
419 break;
420 }
421
Tom Stellard75aadc22012-12-11 21:25:42 +0000422 case AMDGPU::EG_ExportSwz:
423 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000424 // Instruction is left unmodified if its not the last one of its type
425 bool isLastInstructionOfItsType = true;
426 unsigned InstExportType = MI->getOperand(1).getImm();
427 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
428 EndBlock = BB->end(); NextExportInst != EndBlock;
429 NextExportInst = llvm::next(NextExportInst)) {
430 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
431 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
432 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
433 .getImm();
434 if (CurrentInstExportType == InstExportType) {
435 isLastInstructionOfItsType = false;
436 break;
437 }
438 }
439 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000440 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000441 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000442 return BB;
443 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
444 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
445 .addOperand(MI->getOperand(0))
446 .addOperand(MI->getOperand(1))
447 .addOperand(MI->getOperand(2))
448 .addOperand(MI->getOperand(3))
449 .addOperand(MI->getOperand(4))
450 .addOperand(MI->getOperand(5))
451 .addOperand(MI->getOperand(6))
452 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000453 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 break;
455 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000456 case AMDGPU::RETURN: {
457 // RETURN instructions must have the live-out registers as implicit uses,
458 // otherwise they appear dead.
459 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
460 MachineInstrBuilder MIB(*MF, MI);
461 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
462 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
463 return BB;
464 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000465 }
466
467 MI->eraseFromParent();
468 return BB;
469}
470
471//===----------------------------------------------------------------------===//
472// Custom DAG Lowering Operations
473//===----------------------------------------------------------------------===//
474
Tom Stellard75aadc22012-12-11 21:25:42 +0000475SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000476 MachineFunction &MF = DAG.getMachineFunction();
477 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 switch (Op.getOpcode()) {
479 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000480 case ISD::FCOS:
481 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
483 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000485 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000486 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000487 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000488 case ISD::INTRINSIC_VOID: {
489 SDValue Chain = Op.getOperand(0);
490 unsigned IntrinsicID =
491 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
492 switch (IntrinsicID) {
493 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
495 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000496 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000497 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000499 case AMDGPUIntrinsic::R600_store_swizzle: {
500 const SDValue Args[8] = {
501 Chain,
502 Op.getOperand(2), // Export Value
503 Op.getOperand(3), // ArrayBase
504 Op.getOperand(4), // Type
505 DAG.getConstant(0, MVT::i32), // SWZ_X
506 DAG.getConstant(1, MVT::i32), // SWZ_Y
507 DAG.getConstant(2, MVT::i32), // SWZ_Z
508 DAG.getConstant(3, MVT::i32) // SWZ_W
509 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000510 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000511 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000512 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000513
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 // default for switch(IntrinsicID)
515 default: break;
516 }
517 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
518 break;
519 }
520 case ISD::INTRINSIC_WO_CHAIN: {
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
523 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000524 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 switch(IntrinsicID) {
526 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
527 case AMDGPUIntrinsic::R600_load_input: {
528 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
529 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000530 MachineFunction &MF = DAG.getMachineFunction();
531 MachineRegisterInfo &MRI = MF.getRegInfo();
532 MRI.addLiveIn(Reg);
533 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000534 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000536
537 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000538 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000539 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
540 MachineSDNode *interp;
541 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000542 const MachineFunction &MF = DAG.getMachineFunction();
543 const R600InstrInfo *TII =
544 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000545 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
546 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
547 return DAG.getTargetExtractSubreg(
548 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
549 DL, MVT::f32, SDValue(interp, 0));
550 }
551
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000552 MachineFunction &MF = DAG.getMachineFunction();
553 MachineRegisterInfo &MRI = MF.getRegInfo();
554 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
555 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
556 MRI.addLiveIn(RegisterI);
557 MRI.addLiveIn(RegisterJ);
558 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
559 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
560 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
561 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
562
Tom Stellard41afe6a2013-02-05 17:09:14 +0000563 if (slot % 4 < 2)
564 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
565 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000566 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000567 else
568 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
569 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000570 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000571 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000572 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000573 case AMDGPUIntrinsic::R600_tex:
574 case AMDGPUIntrinsic::R600_texc:
575 case AMDGPUIntrinsic::R600_txl:
576 case AMDGPUIntrinsic::R600_txlc:
577 case AMDGPUIntrinsic::R600_txb:
578 case AMDGPUIntrinsic::R600_txbc:
579 case AMDGPUIntrinsic::R600_txf:
580 case AMDGPUIntrinsic::R600_txq:
581 case AMDGPUIntrinsic::R600_ddx:
582 case AMDGPUIntrinsic::R600_ddy: {
583 unsigned TextureOp;
584 switch (IntrinsicID) {
585 case AMDGPUIntrinsic::R600_tex:
586 TextureOp = 0;
587 break;
588 case AMDGPUIntrinsic::R600_texc:
589 TextureOp = 1;
590 break;
591 case AMDGPUIntrinsic::R600_txl:
592 TextureOp = 2;
593 break;
594 case AMDGPUIntrinsic::R600_txlc:
595 TextureOp = 3;
596 break;
597 case AMDGPUIntrinsic::R600_txb:
598 TextureOp = 4;
599 break;
600 case AMDGPUIntrinsic::R600_txbc:
601 TextureOp = 5;
602 break;
603 case AMDGPUIntrinsic::R600_txf:
604 TextureOp = 6;
605 break;
606 case AMDGPUIntrinsic::R600_txq:
607 TextureOp = 7;
608 break;
609 case AMDGPUIntrinsic::R600_ddx:
610 TextureOp = 8;
611 break;
612 case AMDGPUIntrinsic::R600_ddy:
613 TextureOp = 9;
614 break;
615 default:
616 llvm_unreachable("Unknow Texture Operation");
617 }
618
619 SDValue TexArgs[19] = {
620 DAG.getConstant(TextureOp, MVT::i32),
621 Op.getOperand(1),
622 DAG.getConstant(0, MVT::i32),
623 DAG.getConstant(1, MVT::i32),
624 DAG.getConstant(2, MVT::i32),
625 DAG.getConstant(3, MVT::i32),
626 Op.getOperand(2),
627 Op.getOperand(3),
628 Op.getOperand(4),
629 DAG.getConstant(0, MVT::i32),
630 DAG.getConstant(1, MVT::i32),
631 DAG.getConstant(2, MVT::i32),
632 DAG.getConstant(3, MVT::i32),
633 Op.getOperand(5),
634 Op.getOperand(6),
635 Op.getOperand(7),
636 Op.getOperand(8),
637 Op.getOperand(9),
638 Op.getOperand(10)
639 };
640 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
641 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000642 case AMDGPUIntrinsic::AMDGPU_dp4: {
643 SDValue Args[8] = {
644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
645 DAG.getConstant(0, MVT::i32)),
646 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
647 DAG.getConstant(0, MVT::i32)),
648 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
649 DAG.getConstant(1, MVT::i32)),
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
651 DAG.getConstant(1, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
653 DAG.getConstant(2, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
655 DAG.getConstant(2, MVT::i32)),
656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
657 DAG.getConstant(3, MVT::i32)),
658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
659 DAG.getConstant(3, MVT::i32))
660 };
661 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
662 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000663
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000664 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000665 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000666 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000667 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000668 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000670 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000676 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000677 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return LowerImplicitParameter(DAG, VT, DL, 8);
682
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000683 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
685 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
688 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
697 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
700 AMDGPU::T0_Z, VT);
701 }
702 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
703 break;
704 }
705 } // end switch(Op.getOpcode())
706 return SDValue();
707}
708
709void R600TargetLowering::ReplaceNodeResults(SDNode *N,
710 SmallVectorImpl<SDValue> &Results,
711 SelectionDAG &DAG) const {
712 switch (N->getOpcode()) {
713 default: return;
714 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000715 return;
716 case ISD::LOAD: {
717 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
718 Results.push_back(SDValue(Node, 0));
719 Results.push_back(SDValue(Node, 1));
720 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
721 // function
722 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
723 return;
724 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000725 case ISD::STORE:
726 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
727 Results.push_back(SDValue(Node, 0));
728 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 }
730}
731
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000732SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
733 // On hw >= R700, COS/SIN input must be between -1. and 1.
734 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
735 EVT VT = Op.getValueType();
736 SDValue Arg = Op.getOperand(0);
737 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
738 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
739 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
740 DAG.getConstantFP(0.15915494309, MVT::f32)),
741 DAG.getConstantFP(0.5, MVT::f32)));
742 unsigned TrigNode;
743 switch (Op.getOpcode()) {
744 case ISD::FCOS:
745 TrigNode = AMDGPUISD::COS_HW;
746 break;
747 case ISD::FSIN:
748 TrigNode = AMDGPUISD::SIN_HW;
749 break;
750 default:
751 llvm_unreachable("Wrong trig opcode");
752 }
753 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
754 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
755 DAG.getConstantFP(-0.5, MVT::f32)));
756 if (Gen >= AMDGPUSubtarget::R700)
757 return TrigVal;
758 // On R600 hw, COS/SIN input must be between -Pi and Pi.
759 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
760 DAG.getConstantFP(3.14159265359, MVT::f32));
761}
762
Tom Stellard75aadc22012-12-11 21:25:42 +0000763SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
764 return DAG.getNode(
765 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000766 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000767 MVT::i1,
768 Op, DAG.getConstantFP(0.0f, MVT::f32),
769 DAG.getCondCode(ISD::SETNE)
770 );
771}
772
Tom Stellard75aadc22012-12-11 21:25:42 +0000773SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000774 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 unsigned DwordOffset) const {
776 unsigned ByteOffset = DwordOffset * 4;
777 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000778 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000779
780 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
781 assert(isInt<16>(ByteOffset));
782
783 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
784 DAG.getConstant(ByteOffset, MVT::i32), // PTR
785 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
786 false, false, false, 0);
787}
788
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000789SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
790
791 MachineFunction &MF = DAG.getMachineFunction();
792 const AMDGPUFrameLowering *TFL =
793 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
794
795 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
796 assert(FIN);
797
798 unsigned FrameIndex = FIN->getIndex();
799 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
800 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
801}
802
Tom Stellard75aadc22012-12-11 21:25:42 +0000803bool R600TargetLowering::isZero(SDValue Op) const {
804 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
805 return Cst->isNullValue();
806 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
807 return CstFP->isZero();
808 } else {
809 return false;
810 }
811}
812
813SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000814 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000815 EVT VT = Op.getValueType();
816
817 SDValue LHS = Op.getOperand(0);
818 SDValue RHS = Op.getOperand(1);
819 SDValue True = Op.getOperand(2);
820 SDValue False = Op.getOperand(3);
821 SDValue CC = Op.getOperand(4);
822 SDValue Temp;
823
824 // LHS and RHS are guaranteed to be the same value type
825 EVT CompareVT = LHS.getValueType();
826
827 // Check if we can lower this to a native operation.
828
Tom Stellard2add82d2013-03-08 15:37:09 +0000829 // Try to lower to a SET* instruction:
830 //
831 // SET* can match the following patterns:
832 //
833 // select_cc f32, f32, -1, 0, cc_any
834 // select_cc f32, f32, 1.0f, 0.0f, cc_any
835 // select_cc i32, i32, -1, 0, cc_any
836 //
837
838 // Move hardware True/False values to the correct operand.
839 if (isHWTrueValue(False) && isHWFalseValue(True)) {
840 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
841 std::swap(False, True);
842 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
843 }
844
845 if (isHWTrueValue(True) && isHWFalseValue(False) &&
846 (CompareVT == VT || VT == MVT::i32)) {
847 // This can be matched by a SET* instruction.
848 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
849 }
850
Tom Stellard75aadc22012-12-11 21:25:42 +0000851 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000852 //
853 // CND* can match the following patterns:
854 //
855 // select_cc f32, 0.0, f32, f32, cc_any
856 // select_cc f32, 0.0, i32, i32, cc_any
857 // select_cc i32, 0, f32, f32, cc_any
858 // select_cc i32, 0, i32, i32, cc_any
859 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000860 if (isZero(LHS) || isZero(RHS)) {
861 SDValue Cond = (isZero(LHS) ? RHS : LHS);
862 SDValue Zero = (isZero(LHS) ? LHS : RHS);
863 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
864 if (CompareVT != VT) {
865 // Bitcast True / False to the correct types. This will end up being
866 // a nop, but it allows us to define only a single pattern in the
867 // .TD files for each CND* instruction rather than having to have
868 // one pattern for integer True/False and one for fp True/False
869 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
870 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
871 }
872 if (isZero(LHS)) {
873 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
874 }
875
876 switch (CCOpcode) {
877 case ISD::SETONE:
878 case ISD::SETUNE:
879 case ISD::SETNE:
880 case ISD::SETULE:
881 case ISD::SETULT:
882 case ISD::SETOLE:
883 case ISD::SETOLT:
884 case ISD::SETLE:
885 case ISD::SETLT:
886 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
887 Temp = True;
888 True = False;
889 False = Temp;
890 break;
891 default:
892 break;
893 }
894 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
895 Cond, Zero,
896 True, False,
897 DAG.getCondCode(CCOpcode));
898 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
899 }
900
Tom Stellard75aadc22012-12-11 21:25:42 +0000901
902 // Possible Min/Max pattern
903 SDValue MinMax = LowerMinMax(Op, DAG);
904 if (MinMax.getNode()) {
905 return MinMax;
906 }
907
908 // If we make it this for it means we have no native instructions to handle
909 // this SELECT_CC, so we must lower it.
910 SDValue HWTrue, HWFalse;
911
912 if (CompareVT == MVT::f32) {
913 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
914 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
915 } else if (CompareVT == MVT::i32) {
916 HWTrue = DAG.getConstant(-1, CompareVT);
917 HWFalse = DAG.getConstant(0, CompareVT);
918 }
919 else {
920 assert(!"Unhandled value type in LowerSELECT_CC");
921 }
922
923 // Lower this unsupported SELECT_CC into a combination of two supported
924 // SELECT_CC operations.
925 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
926
927 return DAG.getNode(ISD::SELECT_CC, DL, VT,
928 Cond, HWFalse,
929 True, False,
930 DAG.getCondCode(ISD::SETNE));
931}
932
933SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
934 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000935 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000936 Op.getValueType(),
937 Op.getOperand(0),
938 DAG.getConstant(0, MVT::i32),
939 Op.getOperand(1),
940 Op.getOperand(2),
941 DAG.getCondCode(ISD::SETNE));
942}
943
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000944/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
945/// convert these pointers to a register index. Each register holds
946/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
947/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
948/// for indirect addressing.
949SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
950 unsigned StackWidth,
951 SelectionDAG &DAG) const {
952 unsigned SRLPad;
953 switch(StackWidth) {
954 case 1:
955 SRLPad = 2;
956 break;
957 case 2:
958 SRLPad = 3;
959 break;
960 case 4:
961 SRLPad = 4;
962 break;
963 default: llvm_unreachable("Invalid stack width");
964 }
965
Andrew Trickef9de2a2013-05-25 02:42:55 +0000966 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000967 DAG.getConstant(SRLPad, MVT::i32));
968}
969
970void R600TargetLowering::getStackAddress(unsigned StackWidth,
971 unsigned ElemIdx,
972 unsigned &Channel,
973 unsigned &PtrIncr) const {
974 switch (StackWidth) {
975 default:
976 case 1:
977 Channel = 0;
978 if (ElemIdx > 0) {
979 PtrIncr = 1;
980 } else {
981 PtrIncr = 0;
982 }
983 break;
984 case 2:
985 Channel = ElemIdx % 2;
986 if (ElemIdx == 2) {
987 PtrIncr = 1;
988 } else {
989 PtrIncr = 0;
990 }
991 break;
992 case 4:
993 Channel = ElemIdx;
994 PtrIncr = 0;
995 break;
996 }
997}
998
Tom Stellard75aadc22012-12-11 21:25:42 +0000999SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001000 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001001 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1002 SDValue Chain = Op.getOperand(0);
1003 SDValue Value = Op.getOperand(1);
1004 SDValue Ptr = Op.getOperand(2);
1005
1006 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
1007 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
1008 // Convert pointer from byte address to dword address.
1009 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1010 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1011 Ptr, DAG.getConstant(2, MVT::i32)));
1012
1013 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1014 assert(!"Truncated and indexed stores not supported yet");
1015 } else {
1016 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1017 }
1018 return Chain;
1019 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001020
1021 EVT ValueVT = Value.getValueType();
1022
1023 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1024 return SDValue();
1025 }
1026
1027 // Lowering for indirect addressing
1028
1029 const MachineFunction &MF = DAG.getMachineFunction();
1030 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1031 getTargetMachine().getFrameLowering());
1032 unsigned StackWidth = TFL->getStackWidth(MF);
1033
1034 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1035
1036 if (ValueVT.isVector()) {
1037 unsigned NumElemVT = ValueVT.getVectorNumElements();
1038 EVT ElemVT = ValueVT.getVectorElementType();
1039 SDValue Stores[4];
1040
1041 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1042 "vector width in load");
1043
1044 for (unsigned i = 0; i < NumElemVT; ++i) {
1045 unsigned Channel, PtrIncr;
1046 getStackAddress(StackWidth, i, Channel, PtrIncr);
1047 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1048 DAG.getConstant(PtrIncr, MVT::i32));
1049 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1050 Value, DAG.getConstant(i, MVT::i32));
1051
1052 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1053 Chain, Elem, Ptr,
1054 DAG.getTargetConstant(Channel, MVT::i32));
1055 }
1056 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1057 } else {
1058 if (ValueVT == MVT::i8) {
1059 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1060 }
1061 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001062 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001063 }
1064
1065 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001066}
1067
Tom Stellard365366f2013-01-23 02:09:06 +00001068// return (512 + (kc_bank << 12)
1069static int
1070ConstantAddressBlock(unsigned AddressSpace) {
1071 switch (AddressSpace) {
1072 case AMDGPUAS::CONSTANT_BUFFER_0:
1073 return 512;
1074 case AMDGPUAS::CONSTANT_BUFFER_1:
1075 return 512 + 4096;
1076 case AMDGPUAS::CONSTANT_BUFFER_2:
1077 return 512 + 4096 * 2;
1078 case AMDGPUAS::CONSTANT_BUFFER_3:
1079 return 512 + 4096 * 3;
1080 case AMDGPUAS::CONSTANT_BUFFER_4:
1081 return 512 + 4096 * 4;
1082 case AMDGPUAS::CONSTANT_BUFFER_5:
1083 return 512 + 4096 * 5;
1084 case AMDGPUAS::CONSTANT_BUFFER_6:
1085 return 512 + 4096 * 6;
1086 case AMDGPUAS::CONSTANT_BUFFER_7:
1087 return 512 + 4096 * 7;
1088 case AMDGPUAS::CONSTANT_BUFFER_8:
1089 return 512 + 4096 * 8;
1090 case AMDGPUAS::CONSTANT_BUFFER_9:
1091 return 512 + 4096 * 9;
1092 case AMDGPUAS::CONSTANT_BUFFER_10:
1093 return 512 + 4096 * 10;
1094 case AMDGPUAS::CONSTANT_BUFFER_11:
1095 return 512 + 4096 * 11;
1096 case AMDGPUAS::CONSTANT_BUFFER_12:
1097 return 512 + 4096 * 12;
1098 case AMDGPUAS::CONSTANT_BUFFER_13:
1099 return 512 + 4096 * 13;
1100 case AMDGPUAS::CONSTANT_BUFFER_14:
1101 return 512 + 4096 * 14;
1102 case AMDGPUAS::CONSTANT_BUFFER_15:
1103 return 512 + 4096 * 15;
1104 default:
1105 return -1;
1106 }
1107}
1108
1109SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1110{
1111 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001112 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001113 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1114 SDValue Chain = Op.getOperand(0);
1115 SDValue Ptr = Op.getOperand(1);
1116 SDValue LoweredLoad;
1117
1118 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1119 if (ConstantBlock > -1) {
1120 SDValue Result;
1121 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001122 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1123 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001124 SDValue Slots[4];
1125 for (unsigned i = 0; i < 4; i++) {
1126 // We want Const position encoded with the following formula :
1127 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1128 // const_index is Ptr computed by llvm using an alignment of 16.
1129 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1130 // then div by 4 at the ISel step
1131 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1132 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1133 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1134 }
1135 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1136 } else {
1137 // non constant ptr cant be folded, keeps it as a v4f32 load
1138 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001139 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001140 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001141 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001142 );
1143 }
1144
1145 if (!VT.isVector()) {
1146 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1147 DAG.getConstant(0, MVT::i32));
1148 }
1149
1150 SDValue MergedValues[2] = {
1151 Result,
1152 Chain
1153 };
1154 return DAG.getMergeValues(MergedValues, 2, DL);
1155 }
1156
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001157 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1158 return SDValue();
1159 }
1160
1161 // Lowering for indirect addressing
1162 const MachineFunction &MF = DAG.getMachineFunction();
1163 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1164 getTargetMachine().getFrameLowering());
1165 unsigned StackWidth = TFL->getStackWidth(MF);
1166
1167 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1168
1169 if (VT.isVector()) {
1170 unsigned NumElemVT = VT.getVectorNumElements();
1171 EVT ElemVT = VT.getVectorElementType();
1172 SDValue Loads[4];
1173
1174 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1175 "vector width in load");
1176
1177 for (unsigned i = 0; i < NumElemVT; ++i) {
1178 unsigned Channel, PtrIncr;
1179 getStackAddress(StackWidth, i, Channel, PtrIncr);
1180 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1181 DAG.getConstant(PtrIncr, MVT::i32));
1182 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1183 Chain, Ptr,
1184 DAG.getTargetConstant(Channel, MVT::i32),
1185 Op.getOperand(2));
1186 }
1187 for (unsigned i = NumElemVT; i < 4; ++i) {
1188 Loads[i] = DAG.getUNDEF(ElemVT);
1189 }
1190 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1191 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1192 } else {
1193 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1194 Chain, Ptr,
1195 DAG.getTargetConstant(0, MVT::i32), // Channel
1196 Op.getOperand(2));
1197 }
1198
1199 SDValue Ops[2];
1200 Ops[0] = LoweredLoad;
1201 Ops[1] = Chain;
1202
1203 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001204}
Tom Stellard75aadc22012-12-11 21:25:42 +00001205
Tom Stellard75aadc22012-12-11 21:25:42 +00001206/// XXX Only kernel functions are supported, so we can assume for now that
1207/// every function is a kernel function, but in the future we should use
1208/// separate calling conventions for kernel and non-kernel functions.
1209SDValue R600TargetLowering::LowerFormalArguments(
1210 SDValue Chain,
1211 CallingConv::ID CallConv,
1212 bool isVarArg,
1213 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001214 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001215 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001216 SmallVector<CCValAssign, 16> ArgLocs;
1217 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1218 getTargetMachine(), ArgLocs, *DAG.getContext());
1219
1220 AnalyzeFormalArguments(CCInfo, Ins);
1221
Tom Stellard1e803092013-07-23 01:48:18 +00001222 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001223 CCValAssign &VA = ArgLocs[i];
1224 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001225
Tom Stellard75aadc22012-12-11 21:25:42 +00001226 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001227 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001228
1229 // The first 36 bytes of the input buffer contains information about
1230 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001231 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001232 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001233 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1234 false, false, 4); // 4 is the prefered alignment for
1235 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001236 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001237 }
1238 return Chain;
1239}
1240
Matt Arsenault758659232013-05-18 00:21:46 +00001241EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001242 if (!VT.isVector()) return MVT::i32;
1243 return VT.changeVectorElementTypeToInteger();
1244}
1245
Benjamin Kramer193960c2013-06-11 13:32:25 +00001246static SDValue
1247CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1248 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001249 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1250 assert(RemapSwizzle.empty());
1251 SDValue NewBldVec[4] = {
1252 VectorEntry.getOperand(0),
1253 VectorEntry.getOperand(1),
1254 VectorEntry.getOperand(2),
1255 VectorEntry.getOperand(3)
1256 };
1257
1258 for (unsigned i = 0; i < 4; i++) {
1259 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1260 if (C->isZero()) {
1261 RemapSwizzle[i] = 4; // SEL_0
1262 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1263 } else if (C->isExactlyValue(1.0)) {
1264 RemapSwizzle[i] = 5; // SEL_1
1265 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1266 }
1267 }
1268
1269 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1270 continue;
1271 for (unsigned j = 0; j < i; j++) {
1272 if (NewBldVec[i] == NewBldVec[j]) {
1273 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1274 RemapSwizzle[i] = j;
1275 break;
1276 }
1277 }
1278 }
1279
1280 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1281 VectorEntry.getValueType(), NewBldVec, 4);
1282}
1283
Benjamin Kramer193960c2013-06-11 13:32:25 +00001284static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1285 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001286 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1287 assert(RemapSwizzle.empty());
1288 SDValue NewBldVec[4] = {
1289 VectorEntry.getOperand(0),
1290 VectorEntry.getOperand(1),
1291 VectorEntry.getOperand(2),
1292 VectorEntry.getOperand(3)
1293 };
1294 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001295 for (unsigned i = 0; i < 4; i++)
1296 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001297
1298 for (unsigned i = 0; i < 4; i++) {
1299 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1300 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1301 ->getZExtValue();
1302 if (!isUnmovable[Idx]) {
1303 // Swap i and Idx
1304 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001305 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001306 }
1307 isUnmovable[Idx] = true;
1308 }
1309 }
1310
1311 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1312 VectorEntry.getValueType(), NewBldVec, 4);
1313}
1314
1315
1316SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1317SDValue Swz[4], SelectionDAG &DAG) const {
1318 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1319 // Old -> New swizzle values
1320 DenseMap<unsigned, unsigned> SwizzleRemap;
1321
1322 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1323 for (unsigned i = 0; i < 4; i++) {
1324 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1325 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1326 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1327 }
1328
1329 SwizzleRemap.clear();
1330 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1331 for (unsigned i = 0; i < 4; i++) {
1332 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1333 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1334 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1335 }
1336
1337 return BuildVector;
1338}
1339
1340
Tom Stellard75aadc22012-12-11 21:25:42 +00001341//===----------------------------------------------------------------------===//
1342// Custom DAG Optimizations
1343//===----------------------------------------------------------------------===//
1344
1345SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1346 DAGCombinerInfo &DCI) const {
1347 SelectionDAG &DAG = DCI.DAG;
1348
1349 switch (N->getOpcode()) {
1350 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1351 case ISD::FP_ROUND: {
1352 SDValue Arg = N->getOperand(0);
1353 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001354 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001355 Arg.getOperand(0));
1356 }
1357 break;
1358 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001359
1360 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1361 // (i32 select_cc f32, f32, -1, 0 cc)
1362 //
1363 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1364 // this to one of the SET*_DX10 instructions.
1365 case ISD::FP_TO_SINT: {
1366 SDValue FNeg = N->getOperand(0);
1367 if (FNeg.getOpcode() != ISD::FNEG) {
1368 return SDValue();
1369 }
1370 SDValue SelectCC = FNeg.getOperand(0);
1371 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1372 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1373 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1374 !isHWTrueValue(SelectCC.getOperand(2)) ||
1375 !isHWFalseValue(SelectCC.getOperand(3))) {
1376 return SDValue();
1377 }
1378
Andrew Trickef9de2a2013-05-25 02:42:55 +00001379 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001380 SelectCC.getOperand(0), // LHS
1381 SelectCC.getOperand(1), // RHS
1382 DAG.getConstant(-1, MVT::i32), // True
1383 DAG.getConstant(0, MVT::i32), // Flase
1384 SelectCC.getOperand(4)); // CC
1385
1386 break;
1387 }
Tom Stellard365366f2013-01-23 02:09:06 +00001388 // Extract_vec (Build_vector) generated by custom lowering
1389 // also needs to be customly combined
1390 case ISD::EXTRACT_VECTOR_ELT: {
1391 SDValue Arg = N->getOperand(0);
1392 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1393 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1394 unsigned Element = Const->getZExtValue();
1395 return Arg->getOperand(Element);
1396 }
1397 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001398 if (Arg.getOpcode() == ISD::BITCAST &&
1399 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1400 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1401 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001402 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001403 Arg->getOperand(0).getOperand(Element));
1404 }
1405 }
Tom Stellard365366f2013-01-23 02:09:06 +00001406 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001407
1408 case ISD::SELECT_CC: {
1409 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1410 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001411 //
1412 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1413 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001414 SDValue LHS = N->getOperand(0);
1415 if (LHS.getOpcode() != ISD::SELECT_CC) {
1416 return SDValue();
1417 }
1418
1419 SDValue RHS = N->getOperand(1);
1420 SDValue True = N->getOperand(2);
1421 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001422 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001423
1424 if (LHS.getOperand(2).getNode() != True.getNode() ||
1425 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001426 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001427 return SDValue();
1428 }
1429
Tom Stellard5e524892013-03-08 15:37:11 +00001430 switch (NCC) {
1431 default: return SDValue();
1432 case ISD::SETNE: return LHS;
1433 case ISD::SETEQ: {
1434 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1435 LHSCC = ISD::getSetCCInverse(LHSCC,
1436 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001437 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001438 LHS.getOperand(0),
1439 LHS.getOperand(1),
1440 LHS.getOperand(2),
1441 LHS.getOperand(3),
1442 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001443 }
Tom Stellard5e524892013-03-08 15:37:11 +00001444 }
1445 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001446 case AMDGPUISD::EXPORT: {
1447 SDValue Arg = N->getOperand(1);
1448 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1449 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001450
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001451 SDValue NewArgs[8] = {
1452 N->getOperand(0), // Chain
1453 SDValue(),
1454 N->getOperand(2), // ArrayBase
1455 N->getOperand(3), // Type
1456 N->getOperand(4), // SWZ_X
1457 N->getOperand(5), // SWZ_Y
1458 N->getOperand(6), // SWZ_Z
1459 N->getOperand(7) // SWZ_W
1460 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001461 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001462 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001463 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001464 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001465 case AMDGPUISD::TEXTURE_FETCH: {
1466 SDValue Arg = N->getOperand(1);
1467 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1468 break;
1469
1470 SDValue NewArgs[19] = {
1471 N->getOperand(0),
1472 N->getOperand(1),
1473 N->getOperand(2),
1474 N->getOperand(3),
1475 N->getOperand(4),
1476 N->getOperand(5),
1477 N->getOperand(6),
1478 N->getOperand(7),
1479 N->getOperand(8),
1480 N->getOperand(9),
1481 N->getOperand(10),
1482 N->getOperand(11),
1483 N->getOperand(12),
1484 N->getOperand(13),
1485 N->getOperand(14),
1486 N->getOperand(15),
1487 N->getOperand(16),
1488 N->getOperand(17),
1489 N->getOperand(18),
1490 };
1491 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1492 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1493 NewArgs, 19);
1494 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001495 }
1496 return SDValue();
1497}