blob: ce2aa9208f8ff47d390c39b40b7aaed79b8fed8c [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000023#include "llvm/IR/Argument.h"
24#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025
26using namespace llvm;
27
28R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Bill Wendling37e9adb2013-06-07 20:28:55 +000029 AMDGPUTargetLowering(TM) {
Tom Stellard75aadc22012-12-11 21:25:42 +000030 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
31 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
32 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
34 computeRegisterProperties();
35
36 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
37 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
38 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
39 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
40
Tom Stellarda8b03512012-12-21 16:33:24 +000041 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
42 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
43 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
44 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000045 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
46
Tom Stellard492ebea2013-03-08 15:37:07 +000047 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
48 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
50 setOperationAction(ISD::FSUB, MVT::f32, Expand);
51
52 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
53 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
54 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard75aadc22012-12-11 21:25:42 +000056 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
57 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
58
Tom Stellarde8f9f282013-03-08 15:37:05 +000059 setOperationAction(ISD::SETCC, MVT::i32, Expand);
60 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000061 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
62
63 setOperationAction(ISD::SELECT, MVT::i32, Custom);
64 setOperationAction(ISD::SELECT, MVT::f32, Custom);
65
Tom Stellarda99c6ae2013-05-10 02:09:24 +000066 setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
67 setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
68
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000069 // Legalize loads and stores to the private address space.
70 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000071 setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000072 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
73 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
74 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
75 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
76 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
77 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000079 setOperationAction(ISD::STORE, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
81
Tom Stellard365366f2013-01-23 02:09:06 +000082 setOperationAction(ISD::LOAD, MVT::i32, Custom);
83 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000084 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
85
Tom Stellard75aadc22012-12-11 21:25:42 +000086 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000087 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000088 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000089 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +000090
Tom Stellardb852af52013-03-08 15:37:03 +000091 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000092 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +000093 setSchedulingPreference(Sched::VLIW);
94}
95
96MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
97 MachineInstr * MI, MachineBasicBlock * BB) const {
98 MachineFunction * MF = BB->getParent();
99 MachineRegisterInfo &MRI = MF->getRegInfo();
100 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000101 const R600InstrInfo *TII =
102 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
104 switch (MI->getOpcode()) {
105 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 case AMDGPU::CLAMP_R600: {
107 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
108 AMDGPU::MOV,
109 MI->getOperand(0).getReg(),
110 MI->getOperand(1).getReg());
111 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
112 break;
113 }
114
115 case AMDGPU::FABS_R600: {
116 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
117 AMDGPU::MOV,
118 MI->getOperand(0).getReg(),
119 MI->getOperand(1).getReg());
120 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
121 break;
122 }
123
124 case AMDGPU::FNEG_R600: {
125 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
126 AMDGPU::MOV,
127 MI->getOperand(0).getReg(),
128 MI->getOperand(1).getReg());
129 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
130 break;
131 }
132
Tom Stellard75aadc22012-12-11 21:25:42 +0000133 case AMDGPU::MASK_WRITE: {
134 unsigned maskedRegister = MI->getOperand(0).getReg();
135 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
136 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
137 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
138 break;
139 }
140
Tom Stellardc026e8b2013-06-28 15:47:08 +0000141 case AMDGPU::LDS_READ_RET: {
142 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
143 TII->get(MI->getOpcode()),
144 AMDGPU::OQAP);
145 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
146 NewMI.addOperand(MI->getOperand(i));
147 }
148 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
149 MI->getOperand(0).getReg(),
150 AMDGPU::OQAP);
151 break;
152 }
153
Tom Stellard75aadc22012-12-11 21:25:42 +0000154 case AMDGPU::MOV_IMM_F32:
155 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
156 MI->getOperand(1).getFPImm()->getValueAPF()
157 .bitcastToAPInt().getZExtValue());
158 break;
159 case AMDGPU::MOV_IMM_I32:
160 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
161 MI->getOperand(1).getImm());
162 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000163 case AMDGPU::CONST_COPY: {
164 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
165 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000166 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000167 MI->getOperand(1).getImm());
168 break;
169 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000170
171 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
172 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
173 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
174
175 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
176 .addOperand(MI->getOperand(0))
177 .addOperand(MI->getOperand(1))
178 .addImm(EOP); // Set End of program bit
179 break;
180 }
181
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::TXD: {
183 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
184 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000185 MachineOperand &RID = MI->getOperand(4);
186 MachineOperand &SID = MI->getOperand(5);
187 unsigned TextureId = MI->getOperand(6).getImm();
188 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
189 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000190
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000191 switch (TextureId) {
192 case 5: // Rect
193 CTX = CTY = 0;
194 break;
195 case 6: // Shadow1D
196 SrcW = SrcZ;
197 break;
198 case 7: // Shadow2D
199 SrcW = SrcZ;
200 break;
201 case 8: // ShadowRect
202 CTX = CTY = 0;
203 SrcW = SrcZ;
204 break;
205 case 9: // 1DArray
206 SrcZ = SrcY;
207 CTZ = 0;
208 break;
209 case 10: // 2DArray
210 CTZ = 0;
211 break;
212 case 11: // Shadow1DArray
213 SrcZ = SrcY;
214 CTZ = 0;
215 break;
216 case 12: // Shadow2DArray
217 CTZ = 0;
218 break;
219 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000220 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
221 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000222 .addImm(SrcX)
223 .addImm(SrcY)
224 .addImm(SrcZ)
225 .addImm(SrcW)
226 .addImm(0)
227 .addImm(0)
228 .addImm(0)
229 .addImm(0)
230 .addImm(1)
231 .addImm(2)
232 .addImm(3)
233 .addOperand(RID)
234 .addOperand(SID)
235 .addImm(CTX)
236 .addImm(CTY)
237 .addImm(CTZ)
238 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000239 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
240 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000241 .addImm(SrcX)
242 .addImm(SrcY)
243 .addImm(SrcZ)
244 .addImm(SrcW)
245 .addImm(0)
246 .addImm(0)
247 .addImm(0)
248 .addImm(0)
249 .addImm(1)
250 .addImm(2)
251 .addImm(3)
252 .addOperand(RID)
253 .addOperand(SID)
254 .addImm(CTX)
255 .addImm(CTY)
256 .addImm(CTZ)
257 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000258 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
259 .addOperand(MI->getOperand(0))
260 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000261 .addImm(SrcX)
262 .addImm(SrcY)
263 .addImm(SrcZ)
264 .addImm(SrcW)
265 .addImm(0)
266 .addImm(0)
267 .addImm(0)
268 .addImm(0)
269 .addImm(1)
270 .addImm(2)
271 .addImm(3)
272 .addOperand(RID)
273 .addOperand(SID)
274 .addImm(CTX)
275 .addImm(CTY)
276 .addImm(CTZ)
277 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000278 .addReg(T0, RegState::Implicit)
279 .addReg(T1, RegState::Implicit);
280 break;
281 }
282
283 case AMDGPU::TXD_SHADOW: {
284 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
285 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000286 MachineOperand &RID = MI->getOperand(4);
287 MachineOperand &SID = MI->getOperand(5);
288 unsigned TextureId = MI->getOperand(6).getImm();
289 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
290 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
291
292 switch (TextureId) {
293 case 5: // Rect
294 CTX = CTY = 0;
295 break;
296 case 6: // Shadow1D
297 SrcW = SrcZ;
298 break;
299 case 7: // Shadow2D
300 SrcW = SrcZ;
301 break;
302 case 8: // ShadowRect
303 CTX = CTY = 0;
304 SrcW = SrcZ;
305 break;
306 case 9: // 1DArray
307 SrcZ = SrcY;
308 CTZ = 0;
309 break;
310 case 10: // 2DArray
311 CTZ = 0;
312 break;
313 case 11: // Shadow1DArray
314 SrcZ = SrcY;
315 CTZ = 0;
316 break;
317 case 12: // Shadow2DArray
318 CTZ = 0;
319 break;
320 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000321
322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
323 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000324 .addImm(SrcX)
325 .addImm(SrcY)
326 .addImm(SrcZ)
327 .addImm(SrcW)
328 .addImm(0)
329 .addImm(0)
330 .addImm(0)
331 .addImm(0)
332 .addImm(1)
333 .addImm(2)
334 .addImm(3)
335 .addOperand(RID)
336 .addOperand(SID)
337 .addImm(CTX)
338 .addImm(CTY)
339 .addImm(CTZ)
340 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000341 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
342 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000343 .addImm(SrcX)
344 .addImm(SrcY)
345 .addImm(SrcZ)
346 .addImm(SrcW)
347 .addImm(0)
348 .addImm(0)
349 .addImm(0)
350 .addImm(0)
351 .addImm(1)
352 .addImm(2)
353 .addImm(3)
354 .addOperand(RID)
355 .addOperand(SID)
356 .addImm(CTX)
357 .addImm(CTY)
358 .addImm(CTZ)
359 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000360 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
361 .addOperand(MI->getOperand(0))
362 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000363 .addImm(SrcX)
364 .addImm(SrcY)
365 .addImm(SrcZ)
366 .addImm(SrcW)
367 .addImm(0)
368 .addImm(0)
369 .addImm(0)
370 .addImm(0)
371 .addImm(1)
372 .addImm(2)
373 .addImm(3)
374 .addOperand(RID)
375 .addOperand(SID)
376 .addImm(CTX)
377 .addImm(CTY)
378 .addImm(CTZ)
379 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000380 .addReg(T0, RegState::Implicit)
381 .addReg(T1, RegState::Implicit);
382 break;
383 }
384
385 case AMDGPU::BRANCH:
386 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000387 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000388 break;
389
390 case AMDGPU::BRANCH_COND_f32: {
391 MachineInstr *NewMI =
392 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
393 AMDGPU::PREDICATE_BIT)
394 .addOperand(MI->getOperand(1))
395 .addImm(OPCODE_IS_NOT_ZERO)
396 .addImm(0); // Flags
397 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000398 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000399 .addOperand(MI->getOperand(0))
400 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
401 break;
402 }
403
404 case AMDGPU::BRANCH_COND_i32: {
405 MachineInstr *NewMI =
406 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
407 AMDGPU::PREDICATE_BIT)
408 .addOperand(MI->getOperand(1))
409 .addImm(OPCODE_IS_NOT_ZERO_INT)
410 .addImm(0); // Flags
411 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000412 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000413 .addOperand(MI->getOperand(0))
414 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
415 break;
416 }
417
Tom Stellard75aadc22012-12-11 21:25:42 +0000418 case AMDGPU::EG_ExportSwz:
419 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000420 // Instruction is left unmodified if its not the last one of its type
421 bool isLastInstructionOfItsType = true;
422 unsigned InstExportType = MI->getOperand(1).getImm();
423 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
424 EndBlock = BB->end(); NextExportInst != EndBlock;
425 NextExportInst = llvm::next(NextExportInst)) {
426 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
427 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
428 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
429 .getImm();
430 if (CurrentInstExportType == InstExportType) {
431 isLastInstructionOfItsType = false;
432 break;
433 }
434 }
435 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000436 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000437 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000438 return BB;
439 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
440 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
441 .addOperand(MI->getOperand(0))
442 .addOperand(MI->getOperand(1))
443 .addOperand(MI->getOperand(2))
444 .addOperand(MI->getOperand(3))
445 .addOperand(MI->getOperand(4))
446 .addOperand(MI->getOperand(5))
447 .addOperand(MI->getOperand(6))
448 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000449 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 break;
451 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000452 case AMDGPU::RETURN: {
453 // RETURN instructions must have the live-out registers as implicit uses,
454 // otherwise they appear dead.
455 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
456 MachineInstrBuilder MIB(*MF, MI);
457 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
458 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
459 return BB;
460 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000461 }
462
463 MI->eraseFromParent();
464 return BB;
465}
466
467//===----------------------------------------------------------------------===//
468// Custom DAG Lowering Operations
469//===----------------------------------------------------------------------===//
470
Tom Stellard75aadc22012-12-11 21:25:42 +0000471SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000472 MachineFunction &MF = DAG.getMachineFunction();
473 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000474 switch (Op.getOpcode()) {
475 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
477 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000479 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000480 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000481 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 case ISD::INTRINSIC_VOID: {
483 SDValue Chain = Op.getOperand(0);
484 unsigned IntrinsicID =
485 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
486 switch (IntrinsicID) {
487 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000488 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
489 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000490 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000491 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000492 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000493 case AMDGPUIntrinsic::R600_store_swizzle: {
494 const SDValue Args[8] = {
495 Chain,
496 Op.getOperand(2), // Export Value
497 Op.getOperand(3), // ArrayBase
498 Op.getOperand(4), // Type
499 DAG.getConstant(0, MVT::i32), // SWZ_X
500 DAG.getConstant(1, MVT::i32), // SWZ_Y
501 DAG.getConstant(2, MVT::i32), // SWZ_Z
502 DAG.getConstant(3, MVT::i32) // SWZ_W
503 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000504 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000505 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000507
Tom Stellard75aadc22012-12-11 21:25:42 +0000508 // default for switch(IntrinsicID)
509 default: break;
510 }
511 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
512 break;
513 }
514 case ISD::INTRINSIC_WO_CHAIN: {
515 unsigned IntrinsicID =
516 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
517 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000518 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 switch(IntrinsicID) {
520 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
521 case AMDGPUIntrinsic::R600_load_input: {
522 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000524 MachineFunction &MF = DAG.getMachineFunction();
525 MachineRegisterInfo &MRI = MF.getRegInfo();
526 MRI.addLiveIn(Reg);
527 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000528 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000530
531 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000532 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000533 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
534 MachineSDNode *interp;
535 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000536 const MachineFunction &MF = DAG.getMachineFunction();
537 const R600InstrInfo *TII =
538 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000539 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
540 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
541 return DAG.getTargetExtractSubreg(
542 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
543 DL, MVT::f32, SDValue(interp, 0));
544 }
545
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000546 MachineFunction &MF = DAG.getMachineFunction();
547 MachineRegisterInfo &MRI = MF.getRegInfo();
548 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
549 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
550 MRI.addLiveIn(RegisterI);
551 MRI.addLiveIn(RegisterJ);
552 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
553 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
554 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
555 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
556
Tom Stellard41afe6a2013-02-05 17:09:14 +0000557 if (slot % 4 < 2)
558 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
559 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000560 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000561 else
562 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
563 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000564 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000565 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000567 case AMDGPUIntrinsic::R600_tex:
568 case AMDGPUIntrinsic::R600_texc:
569 case AMDGPUIntrinsic::R600_txl:
570 case AMDGPUIntrinsic::R600_txlc:
571 case AMDGPUIntrinsic::R600_txb:
572 case AMDGPUIntrinsic::R600_txbc:
573 case AMDGPUIntrinsic::R600_txf:
574 case AMDGPUIntrinsic::R600_txq:
575 case AMDGPUIntrinsic::R600_ddx:
576 case AMDGPUIntrinsic::R600_ddy: {
577 unsigned TextureOp;
578 switch (IntrinsicID) {
579 case AMDGPUIntrinsic::R600_tex:
580 TextureOp = 0;
581 break;
582 case AMDGPUIntrinsic::R600_texc:
583 TextureOp = 1;
584 break;
585 case AMDGPUIntrinsic::R600_txl:
586 TextureOp = 2;
587 break;
588 case AMDGPUIntrinsic::R600_txlc:
589 TextureOp = 3;
590 break;
591 case AMDGPUIntrinsic::R600_txb:
592 TextureOp = 4;
593 break;
594 case AMDGPUIntrinsic::R600_txbc:
595 TextureOp = 5;
596 break;
597 case AMDGPUIntrinsic::R600_txf:
598 TextureOp = 6;
599 break;
600 case AMDGPUIntrinsic::R600_txq:
601 TextureOp = 7;
602 break;
603 case AMDGPUIntrinsic::R600_ddx:
604 TextureOp = 8;
605 break;
606 case AMDGPUIntrinsic::R600_ddy:
607 TextureOp = 9;
608 break;
609 default:
610 llvm_unreachable("Unknow Texture Operation");
611 }
612
613 SDValue TexArgs[19] = {
614 DAG.getConstant(TextureOp, MVT::i32),
615 Op.getOperand(1),
616 DAG.getConstant(0, MVT::i32),
617 DAG.getConstant(1, MVT::i32),
618 DAG.getConstant(2, MVT::i32),
619 DAG.getConstant(3, MVT::i32),
620 Op.getOperand(2),
621 Op.getOperand(3),
622 Op.getOperand(4),
623 DAG.getConstant(0, MVT::i32),
624 DAG.getConstant(1, MVT::i32),
625 DAG.getConstant(2, MVT::i32),
626 DAG.getConstant(3, MVT::i32),
627 Op.getOperand(5),
628 Op.getOperand(6),
629 Op.getOperand(7),
630 Op.getOperand(8),
631 Op.getOperand(9),
632 Op.getOperand(10)
633 };
634 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
635 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000636 case AMDGPUIntrinsic::AMDGPU_dp4: {
637 SDValue Args[8] = {
638 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
639 DAG.getConstant(0, MVT::i32)),
640 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
641 DAG.getConstant(0, MVT::i32)),
642 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
643 DAG.getConstant(1, MVT::i32)),
644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
645 DAG.getConstant(1, MVT::i32)),
646 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
647 DAG.getConstant(2, MVT::i32)),
648 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
649 DAG.getConstant(2, MVT::i32)),
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
651 DAG.getConstant(3, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
653 DAG.getConstant(3, MVT::i32))
654 };
655 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
656 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000657
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000658 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000659 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000660 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000661 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000662 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000663 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000664 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000665 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000666 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000667 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000668 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000670 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 8);
676
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000677 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000678 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
679 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
682 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000683 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
685 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
688 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T0_Z, VT);
695 }
696 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
697 break;
698 }
699 } // end switch(Op.getOpcode())
700 return SDValue();
701}
702
703void R600TargetLowering::ReplaceNodeResults(SDNode *N,
704 SmallVectorImpl<SDValue> &Results,
705 SelectionDAG &DAG) const {
706 switch (N->getOpcode()) {
707 default: return;
708 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000709 return;
710 case ISD::LOAD: {
711 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
712 Results.push_back(SDValue(Node, 0));
713 Results.push_back(SDValue(Node, 1));
714 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
715 // function
716 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
717 return;
718 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000719 case ISD::STORE:
720 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
721 Results.push_back(SDValue(Node, 0));
722 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 }
724}
725
726SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
727 return DAG.getNode(
728 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000729 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000730 MVT::i1,
731 Op, DAG.getConstantFP(0.0f, MVT::f32),
732 DAG.getCondCode(ISD::SETNE)
733 );
734}
735
Tom Stellard75aadc22012-12-11 21:25:42 +0000736SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000737 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000738 unsigned DwordOffset) const {
739 unsigned ByteOffset = DwordOffset * 4;
740 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
741 AMDGPUAS::PARAM_I_ADDRESS);
742
743 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
744 assert(isInt<16>(ByteOffset));
745
746 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
747 DAG.getConstant(ByteOffset, MVT::i32), // PTR
748 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
749 false, false, false, 0);
750}
751
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000752SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
753
754 MachineFunction &MF = DAG.getMachineFunction();
755 const AMDGPUFrameLowering *TFL =
756 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
757
758 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
759 assert(FIN);
760
761 unsigned FrameIndex = FIN->getIndex();
762 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
763 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
764}
765
Tom Stellard75aadc22012-12-11 21:25:42 +0000766bool R600TargetLowering::isZero(SDValue Op) const {
767 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
768 return Cst->isNullValue();
769 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
770 return CstFP->isZero();
771 } else {
772 return false;
773 }
774}
775
776SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000777 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000778 EVT VT = Op.getValueType();
779
780 SDValue LHS = Op.getOperand(0);
781 SDValue RHS = Op.getOperand(1);
782 SDValue True = Op.getOperand(2);
783 SDValue False = Op.getOperand(3);
784 SDValue CC = Op.getOperand(4);
785 SDValue Temp;
786
787 // LHS and RHS are guaranteed to be the same value type
788 EVT CompareVT = LHS.getValueType();
789
790 // Check if we can lower this to a native operation.
791
Tom Stellard2add82d2013-03-08 15:37:09 +0000792 // Try to lower to a SET* instruction:
793 //
794 // SET* can match the following patterns:
795 //
796 // select_cc f32, f32, -1, 0, cc_any
797 // select_cc f32, f32, 1.0f, 0.0f, cc_any
798 // select_cc i32, i32, -1, 0, cc_any
799 //
800
801 // Move hardware True/False values to the correct operand.
802 if (isHWTrueValue(False) && isHWFalseValue(True)) {
803 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
804 std::swap(False, True);
805 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
806 }
807
808 if (isHWTrueValue(True) && isHWFalseValue(False) &&
809 (CompareVT == VT || VT == MVT::i32)) {
810 // This can be matched by a SET* instruction.
811 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
812 }
813
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000815 //
816 // CND* can match the following patterns:
817 //
818 // select_cc f32, 0.0, f32, f32, cc_any
819 // select_cc f32, 0.0, i32, i32, cc_any
820 // select_cc i32, 0, f32, f32, cc_any
821 // select_cc i32, 0, i32, i32, cc_any
822 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000823 if (isZero(LHS) || isZero(RHS)) {
824 SDValue Cond = (isZero(LHS) ? RHS : LHS);
825 SDValue Zero = (isZero(LHS) ? LHS : RHS);
826 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
827 if (CompareVT != VT) {
828 // Bitcast True / False to the correct types. This will end up being
829 // a nop, but it allows us to define only a single pattern in the
830 // .TD files for each CND* instruction rather than having to have
831 // one pattern for integer True/False and one for fp True/False
832 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
833 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
834 }
835 if (isZero(LHS)) {
836 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
837 }
838
839 switch (CCOpcode) {
840 case ISD::SETONE:
841 case ISD::SETUNE:
842 case ISD::SETNE:
843 case ISD::SETULE:
844 case ISD::SETULT:
845 case ISD::SETOLE:
846 case ISD::SETOLT:
847 case ISD::SETLE:
848 case ISD::SETLT:
849 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
850 Temp = True;
851 True = False;
852 False = Temp;
853 break;
854 default:
855 break;
856 }
857 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
858 Cond, Zero,
859 True, False,
860 DAG.getCondCode(CCOpcode));
861 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
862 }
863
Tom Stellard75aadc22012-12-11 21:25:42 +0000864
865 // Possible Min/Max pattern
866 SDValue MinMax = LowerMinMax(Op, DAG);
867 if (MinMax.getNode()) {
868 return MinMax;
869 }
870
871 // If we make it this for it means we have no native instructions to handle
872 // this SELECT_CC, so we must lower it.
873 SDValue HWTrue, HWFalse;
874
875 if (CompareVT == MVT::f32) {
876 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
877 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
878 } else if (CompareVT == MVT::i32) {
879 HWTrue = DAG.getConstant(-1, CompareVT);
880 HWFalse = DAG.getConstant(0, CompareVT);
881 }
882 else {
883 assert(!"Unhandled value type in LowerSELECT_CC");
884 }
885
886 // Lower this unsupported SELECT_CC into a combination of two supported
887 // SELECT_CC operations.
888 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
889
890 return DAG.getNode(ISD::SELECT_CC, DL, VT,
891 Cond, HWFalse,
892 True, False,
893 DAG.getCondCode(ISD::SETNE));
894}
895
896SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
897 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000898 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000899 Op.getValueType(),
900 Op.getOperand(0),
901 DAG.getConstant(0, MVT::i32),
902 Op.getOperand(1),
903 Op.getOperand(2),
904 DAG.getCondCode(ISD::SETNE));
905}
906
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000907/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
908/// convert these pointers to a register index. Each register holds
909/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
910/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
911/// for indirect addressing.
912SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
913 unsigned StackWidth,
914 SelectionDAG &DAG) const {
915 unsigned SRLPad;
916 switch(StackWidth) {
917 case 1:
918 SRLPad = 2;
919 break;
920 case 2:
921 SRLPad = 3;
922 break;
923 case 4:
924 SRLPad = 4;
925 break;
926 default: llvm_unreachable("Invalid stack width");
927 }
928
Andrew Trickef9de2a2013-05-25 02:42:55 +0000929 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000930 DAG.getConstant(SRLPad, MVT::i32));
931}
932
933void R600TargetLowering::getStackAddress(unsigned StackWidth,
934 unsigned ElemIdx,
935 unsigned &Channel,
936 unsigned &PtrIncr) const {
937 switch (StackWidth) {
938 default:
939 case 1:
940 Channel = 0;
941 if (ElemIdx > 0) {
942 PtrIncr = 1;
943 } else {
944 PtrIncr = 0;
945 }
946 break;
947 case 2:
948 Channel = ElemIdx % 2;
949 if (ElemIdx == 2) {
950 PtrIncr = 1;
951 } else {
952 PtrIncr = 0;
953 }
954 break;
955 case 4:
956 Channel = ElemIdx;
957 PtrIncr = 0;
958 break;
959 }
960}
961
Tom Stellard75aadc22012-12-11 21:25:42 +0000962SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000963 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000964 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
965 SDValue Chain = Op.getOperand(0);
966 SDValue Value = Op.getOperand(1);
967 SDValue Ptr = Op.getOperand(2);
968
969 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
970 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
971 // Convert pointer from byte address to dword address.
972 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
973 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
974 Ptr, DAG.getConstant(2, MVT::i32)));
975
976 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
977 assert(!"Truncated and indexed stores not supported yet");
978 } else {
979 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
980 }
981 return Chain;
982 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000983
984 EVT ValueVT = Value.getValueType();
985
986 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
987 return SDValue();
988 }
989
990 // Lowering for indirect addressing
991
992 const MachineFunction &MF = DAG.getMachineFunction();
993 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
994 getTargetMachine().getFrameLowering());
995 unsigned StackWidth = TFL->getStackWidth(MF);
996
997 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
998
999 if (ValueVT.isVector()) {
1000 unsigned NumElemVT = ValueVT.getVectorNumElements();
1001 EVT ElemVT = ValueVT.getVectorElementType();
1002 SDValue Stores[4];
1003
1004 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1005 "vector width in load");
1006
1007 for (unsigned i = 0; i < NumElemVT; ++i) {
1008 unsigned Channel, PtrIncr;
1009 getStackAddress(StackWidth, i, Channel, PtrIncr);
1010 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1011 DAG.getConstant(PtrIncr, MVT::i32));
1012 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1013 Value, DAG.getConstant(i, MVT::i32));
1014
1015 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1016 Chain, Elem, Ptr,
1017 DAG.getTargetConstant(Channel, MVT::i32));
1018 }
1019 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1020 } else {
1021 if (ValueVT == MVT::i8) {
1022 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1023 }
1024 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001025 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001026 }
1027
1028 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001029}
1030
Tom Stellard365366f2013-01-23 02:09:06 +00001031// return (512 + (kc_bank << 12)
1032static int
1033ConstantAddressBlock(unsigned AddressSpace) {
1034 switch (AddressSpace) {
1035 case AMDGPUAS::CONSTANT_BUFFER_0:
1036 return 512;
1037 case AMDGPUAS::CONSTANT_BUFFER_1:
1038 return 512 + 4096;
1039 case AMDGPUAS::CONSTANT_BUFFER_2:
1040 return 512 + 4096 * 2;
1041 case AMDGPUAS::CONSTANT_BUFFER_3:
1042 return 512 + 4096 * 3;
1043 case AMDGPUAS::CONSTANT_BUFFER_4:
1044 return 512 + 4096 * 4;
1045 case AMDGPUAS::CONSTANT_BUFFER_5:
1046 return 512 + 4096 * 5;
1047 case AMDGPUAS::CONSTANT_BUFFER_6:
1048 return 512 + 4096 * 6;
1049 case AMDGPUAS::CONSTANT_BUFFER_7:
1050 return 512 + 4096 * 7;
1051 case AMDGPUAS::CONSTANT_BUFFER_8:
1052 return 512 + 4096 * 8;
1053 case AMDGPUAS::CONSTANT_BUFFER_9:
1054 return 512 + 4096 * 9;
1055 case AMDGPUAS::CONSTANT_BUFFER_10:
1056 return 512 + 4096 * 10;
1057 case AMDGPUAS::CONSTANT_BUFFER_11:
1058 return 512 + 4096 * 11;
1059 case AMDGPUAS::CONSTANT_BUFFER_12:
1060 return 512 + 4096 * 12;
1061 case AMDGPUAS::CONSTANT_BUFFER_13:
1062 return 512 + 4096 * 13;
1063 case AMDGPUAS::CONSTANT_BUFFER_14:
1064 return 512 + 4096 * 14;
1065 case AMDGPUAS::CONSTANT_BUFFER_15:
1066 return 512 + 4096 * 15;
1067 default:
1068 return -1;
1069 }
1070}
1071
1072SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1073{
1074 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001075 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001076 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1077 SDValue Chain = Op.getOperand(0);
1078 SDValue Ptr = Op.getOperand(1);
1079 SDValue LoweredLoad;
1080
1081 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1082 if (ConstantBlock > -1) {
1083 SDValue Result;
1084 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001085 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1086 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001087 SDValue Slots[4];
1088 for (unsigned i = 0; i < 4; i++) {
1089 // We want Const position encoded with the following formula :
1090 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1091 // const_index is Ptr computed by llvm using an alignment of 16.
1092 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1093 // then div by 4 at the ISel step
1094 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1095 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1096 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1097 }
1098 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1099 } else {
1100 // non constant ptr cant be folded, keeps it as a v4f32 load
1101 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001102 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001103 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001104 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001105 );
1106 }
1107
1108 if (!VT.isVector()) {
1109 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1110 DAG.getConstant(0, MVT::i32));
1111 }
1112
1113 SDValue MergedValues[2] = {
1114 Result,
1115 Chain
1116 };
1117 return DAG.getMergeValues(MergedValues, 2, DL);
1118 }
1119
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001120 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1121 return SDValue();
1122 }
1123
1124 // Lowering for indirect addressing
1125 const MachineFunction &MF = DAG.getMachineFunction();
1126 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1127 getTargetMachine().getFrameLowering());
1128 unsigned StackWidth = TFL->getStackWidth(MF);
1129
1130 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1131
1132 if (VT.isVector()) {
1133 unsigned NumElemVT = VT.getVectorNumElements();
1134 EVT ElemVT = VT.getVectorElementType();
1135 SDValue Loads[4];
1136
1137 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1138 "vector width in load");
1139
1140 for (unsigned i = 0; i < NumElemVT; ++i) {
1141 unsigned Channel, PtrIncr;
1142 getStackAddress(StackWidth, i, Channel, PtrIncr);
1143 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1144 DAG.getConstant(PtrIncr, MVT::i32));
1145 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1146 Chain, Ptr,
1147 DAG.getTargetConstant(Channel, MVT::i32),
1148 Op.getOperand(2));
1149 }
1150 for (unsigned i = NumElemVT; i < 4; ++i) {
1151 Loads[i] = DAG.getUNDEF(ElemVT);
1152 }
1153 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1154 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1155 } else {
1156 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1157 Chain, Ptr,
1158 DAG.getTargetConstant(0, MVT::i32), // Channel
1159 Op.getOperand(2));
1160 }
1161
1162 SDValue Ops[2];
1163 Ops[0] = LoweredLoad;
1164 Ops[1] = Chain;
1165
1166 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001167}
Tom Stellard75aadc22012-12-11 21:25:42 +00001168
Tom Stellard75aadc22012-12-11 21:25:42 +00001169/// XXX Only kernel functions are supported, so we can assume for now that
1170/// every function is a kernel function, but in the future we should use
1171/// separate calling conventions for kernel and non-kernel functions.
1172SDValue R600TargetLowering::LowerFormalArguments(
1173 SDValue Chain,
1174 CallingConv::ID CallConv,
1175 bool isVarArg,
1176 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001177 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001178 SmallVectorImpl<SDValue> &InVals) const {
1179 unsigned ParamOffsetBytes = 36;
1180 Function::const_arg_iterator FuncArg =
1181 DAG.getMachineFunction().getFunction()->arg_begin();
1182 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
1183 EVT VT = Ins[i].VT;
1184 Type *ArgType = FuncArg->getType();
1185 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
1186 32 : ArgType->getPrimitiveSizeInBits();
1187 unsigned ArgBytes = ArgSizeInBits >> 3;
1188 EVT ArgVT;
1189 if (ArgSizeInBits < VT.getSizeInBits()) {
1190 assert(!ArgType->isFloatTy() &&
1191 "Extending floating point arguments not supported yet");
1192 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
1193 } else {
1194 ArgVT = VT;
1195 }
1196 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1197 AMDGPUAS::PARAM_I_ADDRESS);
1198 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
1199 DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard8d469ed2013-02-19 15:22:44 +00001200 MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard75aadc22012-12-11 21:25:42 +00001201 ArgVT, false, false, ArgBytes);
1202 InVals.push_back(Arg);
1203 ParamOffsetBytes += ArgBytes;
1204 }
1205 return Chain;
1206}
1207
Matt Arsenault758659232013-05-18 00:21:46 +00001208EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001209 if (!VT.isVector()) return MVT::i32;
1210 return VT.changeVectorElementTypeToInteger();
1211}
1212
Benjamin Kramer193960c2013-06-11 13:32:25 +00001213static SDValue
1214CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1215 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001216 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1217 assert(RemapSwizzle.empty());
1218 SDValue NewBldVec[4] = {
1219 VectorEntry.getOperand(0),
1220 VectorEntry.getOperand(1),
1221 VectorEntry.getOperand(2),
1222 VectorEntry.getOperand(3)
1223 };
1224
1225 for (unsigned i = 0; i < 4; i++) {
1226 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1227 if (C->isZero()) {
1228 RemapSwizzle[i] = 4; // SEL_0
1229 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1230 } else if (C->isExactlyValue(1.0)) {
1231 RemapSwizzle[i] = 5; // SEL_1
1232 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1233 }
1234 }
1235
1236 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1237 continue;
1238 for (unsigned j = 0; j < i; j++) {
1239 if (NewBldVec[i] == NewBldVec[j]) {
1240 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1241 RemapSwizzle[i] = j;
1242 break;
1243 }
1244 }
1245 }
1246
1247 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1248 VectorEntry.getValueType(), NewBldVec, 4);
1249}
1250
Benjamin Kramer193960c2013-06-11 13:32:25 +00001251static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1252 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001253 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1254 assert(RemapSwizzle.empty());
1255 SDValue NewBldVec[4] = {
1256 VectorEntry.getOperand(0),
1257 VectorEntry.getOperand(1),
1258 VectorEntry.getOperand(2),
1259 VectorEntry.getOperand(3)
1260 };
1261 bool isUnmovable[4] = { false, false, false, false };
1262
1263 for (unsigned i = 0; i < 4; i++) {
1264 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1265 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1266 ->getZExtValue();
1267 if (!isUnmovable[Idx]) {
1268 // Swap i and Idx
1269 std::swap(NewBldVec[Idx], NewBldVec[i]);
1270 RemapSwizzle[Idx] = i;
1271 RemapSwizzle[i] = Idx;
1272 }
1273 isUnmovable[Idx] = true;
1274 }
1275 }
1276
1277 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1278 VectorEntry.getValueType(), NewBldVec, 4);
1279}
1280
1281
1282SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1283SDValue Swz[4], SelectionDAG &DAG) const {
1284 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1285 // Old -> New swizzle values
1286 DenseMap<unsigned, unsigned> SwizzleRemap;
1287
1288 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1289 for (unsigned i = 0; i < 4; i++) {
1290 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1291 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1292 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1293 }
1294
1295 SwizzleRemap.clear();
1296 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1297 for (unsigned i = 0; i < 4; i++) {
1298 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1299 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1300 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1301 }
1302
1303 return BuildVector;
1304}
1305
1306
Tom Stellard75aadc22012-12-11 21:25:42 +00001307//===----------------------------------------------------------------------===//
1308// Custom DAG Optimizations
1309//===----------------------------------------------------------------------===//
1310
1311SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1312 DAGCombinerInfo &DCI) const {
1313 SelectionDAG &DAG = DCI.DAG;
1314
1315 switch (N->getOpcode()) {
1316 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1317 case ISD::FP_ROUND: {
1318 SDValue Arg = N->getOperand(0);
1319 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001320 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001321 Arg.getOperand(0));
1322 }
1323 break;
1324 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001325
1326 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1327 // (i32 select_cc f32, f32, -1, 0 cc)
1328 //
1329 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1330 // this to one of the SET*_DX10 instructions.
1331 case ISD::FP_TO_SINT: {
1332 SDValue FNeg = N->getOperand(0);
1333 if (FNeg.getOpcode() != ISD::FNEG) {
1334 return SDValue();
1335 }
1336 SDValue SelectCC = FNeg.getOperand(0);
1337 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1338 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1339 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1340 !isHWTrueValue(SelectCC.getOperand(2)) ||
1341 !isHWFalseValue(SelectCC.getOperand(3))) {
1342 return SDValue();
1343 }
1344
Andrew Trickef9de2a2013-05-25 02:42:55 +00001345 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001346 SelectCC.getOperand(0), // LHS
1347 SelectCC.getOperand(1), // RHS
1348 DAG.getConstant(-1, MVT::i32), // True
1349 DAG.getConstant(0, MVT::i32), // Flase
1350 SelectCC.getOperand(4)); // CC
1351
1352 break;
1353 }
Tom Stellard365366f2013-01-23 02:09:06 +00001354 // Extract_vec (Build_vector) generated by custom lowering
1355 // also needs to be customly combined
1356 case ISD::EXTRACT_VECTOR_ELT: {
1357 SDValue Arg = N->getOperand(0);
1358 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1359 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1360 unsigned Element = Const->getZExtValue();
1361 return Arg->getOperand(Element);
1362 }
1363 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001364 if (Arg.getOpcode() == ISD::BITCAST &&
1365 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1366 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1367 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001368 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001369 Arg->getOperand(0).getOperand(Element));
1370 }
1371 }
Tom Stellard365366f2013-01-23 02:09:06 +00001372 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001373
1374 case ISD::SELECT_CC: {
1375 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1376 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001377 //
1378 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1379 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001380 SDValue LHS = N->getOperand(0);
1381 if (LHS.getOpcode() != ISD::SELECT_CC) {
1382 return SDValue();
1383 }
1384
1385 SDValue RHS = N->getOperand(1);
1386 SDValue True = N->getOperand(2);
1387 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001388 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001389
1390 if (LHS.getOperand(2).getNode() != True.getNode() ||
1391 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001392 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001393 return SDValue();
1394 }
1395
Tom Stellard5e524892013-03-08 15:37:11 +00001396 switch (NCC) {
1397 default: return SDValue();
1398 case ISD::SETNE: return LHS;
1399 case ISD::SETEQ: {
1400 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1401 LHSCC = ISD::getSetCCInverse(LHSCC,
1402 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001403 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001404 LHS.getOperand(0),
1405 LHS.getOperand(1),
1406 LHS.getOperand(2),
1407 LHS.getOperand(3),
1408 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001409 }
Tom Stellard5e524892013-03-08 15:37:11 +00001410 }
1411 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001412 case AMDGPUISD::EXPORT: {
1413 SDValue Arg = N->getOperand(1);
1414 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1415 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001416
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001417 SDValue NewArgs[8] = {
1418 N->getOperand(0), // Chain
1419 SDValue(),
1420 N->getOperand(2), // ArrayBase
1421 N->getOperand(3), // Type
1422 N->getOperand(4), // SWZ_X
1423 N->getOperand(5), // SWZ_Y
1424 N->getOperand(6), // SWZ_Z
1425 N->getOperand(7) // SWZ_W
1426 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001427 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001428 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001429 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001430 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001431 case AMDGPUISD::TEXTURE_FETCH: {
1432 SDValue Arg = N->getOperand(1);
1433 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1434 break;
1435
1436 SDValue NewArgs[19] = {
1437 N->getOperand(0),
1438 N->getOperand(1),
1439 N->getOperand(2),
1440 N->getOperand(3),
1441 N->getOperand(4),
1442 N->getOperand(5),
1443 N->getOperand(6),
1444 N->getOperand(7),
1445 N->getOperand(8),
1446 N->getOperand(9),
1447 N->getOperand(10),
1448 N->getOperand(11),
1449 N->getOperand(12),
1450 N->getOperand(13),
1451 N->getOperand(14),
1452 N->getOperand(15),
1453 N->getOperand(16),
1454 N->getOperand(17),
1455 N->getOperand(18),
1456 };
1457 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1458 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1459 NewArgs, 19);
1460 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001461 }
1462 return SDValue();
1463}