blob: 110dcc18876a9379a19cc300c47c6fecc3c963d8 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000022#include "llvm/IR/Argument.h"
23#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024
25using namespace llvm;
26
27R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
28 AMDGPUTargetLowering(TM),
29 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
30 setOperationAction(ISD::MUL, MVT::i64, Expand);
31 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35 computeRegisterProperties();
36
37 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
40 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
41
42 setOperationAction(ISD::ADD, MVT::v4i32, Expand);
43 setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000044 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
45 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
46 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
47 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000048 setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
49 setOperationAction(ISD::UREM, MVT::v4i32, Expand);
50 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
51
52 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
53 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
54
55 setOperationAction(ISD::FSUB, MVT::f32, Expand);
56
57 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
58 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
59 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
60 setOperationAction(ISD::FPOW, MVT::f32, Custom);
61
62 setOperationAction(ISD::ROTL, MVT::i32, Custom);
63
64 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
65 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
66
67 setOperationAction(ISD::SETCC, MVT::i32, Custom);
68 setOperationAction(ISD::SETCC, MVT::f32, Custom);
69 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
70
71 setOperationAction(ISD::SELECT, MVT::i32, Custom);
72 setOperationAction(ISD::SELECT, MVT::f32, Custom);
73
74 setOperationAction(ISD::STORE, MVT::i32, Custom);
75 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
76
Tom Stellard365366f2013-01-23 02:09:06 +000077 setOperationAction(ISD::LOAD, MVT::i32, Custom);
78 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellard365366f2013-01-23 02:09:06 +000080 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000081
82 setSchedulingPreference(Sched::VLIW);
83}
84
85MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
86 MachineInstr * MI, MachineBasicBlock * BB) const {
87 MachineFunction * MF = BB->getParent();
88 MachineRegisterInfo &MRI = MF->getRegInfo();
89 MachineBasicBlock::iterator I = *MI;
90
91 switch (MI->getOpcode()) {
92 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
93 case AMDGPU::SHADER_TYPE: break;
94 case AMDGPU::CLAMP_R600: {
95 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
96 AMDGPU::MOV,
97 MI->getOperand(0).getReg(),
98 MI->getOperand(1).getReg());
99 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
100 break;
101 }
102
103 case AMDGPU::FABS_R600: {
104 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
105 AMDGPU::MOV,
106 MI->getOperand(0).getReg(),
107 MI->getOperand(1).getReg());
108 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
109 break;
110 }
111
112 case AMDGPU::FNEG_R600: {
113 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
114 AMDGPU::MOV,
115 MI->getOperand(0).getReg(),
116 MI->getOperand(1).getReg());
117 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
118 break;
119 }
120
Tom Stellard75aadc22012-12-11 21:25:42 +0000121 case AMDGPU::MASK_WRITE: {
122 unsigned maskedRegister = MI->getOperand(0).getReg();
123 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
124 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
125 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
126 break;
127 }
128
129 case AMDGPU::MOV_IMM_F32:
130 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
131 MI->getOperand(1).getFPImm()->getValueAPF()
132 .bitcastToAPInt().getZExtValue());
133 break;
134 case AMDGPU::MOV_IMM_I32:
135 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
136 MI->getOperand(1).getImm());
137 break;
138
139
140 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
141 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
142 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
143
144 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
145 .addOperand(MI->getOperand(0))
146 .addOperand(MI->getOperand(1))
147 .addImm(EOP); // Set End of program bit
148 break;
149 }
150
Tom Stellard75aadc22012-12-11 21:25:42 +0000151 case AMDGPU::TXD: {
152 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
153 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
154
155 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
156 .addOperand(MI->getOperand(3))
157 .addOperand(MI->getOperand(4))
158 .addOperand(MI->getOperand(5))
159 .addOperand(MI->getOperand(6));
160 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
161 .addOperand(MI->getOperand(2))
162 .addOperand(MI->getOperand(4))
163 .addOperand(MI->getOperand(5))
164 .addOperand(MI->getOperand(6));
165 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
166 .addOperand(MI->getOperand(0))
167 .addOperand(MI->getOperand(1))
168 .addOperand(MI->getOperand(4))
169 .addOperand(MI->getOperand(5))
170 .addOperand(MI->getOperand(6))
171 .addReg(T0, RegState::Implicit)
172 .addReg(T1, RegState::Implicit);
173 break;
174 }
175
176 case AMDGPU::TXD_SHADOW: {
177 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
178 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
179
180 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
181 .addOperand(MI->getOperand(3))
182 .addOperand(MI->getOperand(4))
183 .addOperand(MI->getOperand(5))
184 .addOperand(MI->getOperand(6));
185 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
186 .addOperand(MI->getOperand(2))
187 .addOperand(MI->getOperand(4))
188 .addOperand(MI->getOperand(5))
189 .addOperand(MI->getOperand(6));
190 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
191 .addOperand(MI->getOperand(0))
192 .addOperand(MI->getOperand(1))
193 .addOperand(MI->getOperand(4))
194 .addOperand(MI->getOperand(5))
195 .addOperand(MI->getOperand(6))
196 .addReg(T0, RegState::Implicit)
197 .addReg(T1, RegState::Implicit);
198 break;
199 }
200
201 case AMDGPU::BRANCH:
202 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
203 .addOperand(MI->getOperand(0))
204 .addReg(0);
205 break;
206
207 case AMDGPU::BRANCH_COND_f32: {
208 MachineInstr *NewMI =
209 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
210 AMDGPU::PREDICATE_BIT)
211 .addOperand(MI->getOperand(1))
212 .addImm(OPCODE_IS_NOT_ZERO)
213 .addImm(0); // Flags
214 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
215 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
216 .addOperand(MI->getOperand(0))
217 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
218 break;
219 }
220
221 case AMDGPU::BRANCH_COND_i32: {
222 MachineInstr *NewMI =
223 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
224 AMDGPU::PREDICATE_BIT)
225 .addOperand(MI->getOperand(1))
226 .addImm(OPCODE_IS_NOT_ZERO_INT)
227 .addImm(0); // Flags
228 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
229 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
230 .addOperand(MI->getOperand(0))
231 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
232 break;
233 }
234
Tom Stellard75aadc22012-12-11 21:25:42 +0000235 case AMDGPU::EG_ExportSwz:
236 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000237 // Instruction is left unmodified if its not the last one of its type
238 bool isLastInstructionOfItsType = true;
239 unsigned InstExportType = MI->getOperand(1).getImm();
240 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
241 EndBlock = BB->end(); NextExportInst != EndBlock;
242 NextExportInst = llvm::next(NextExportInst)) {
243 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
244 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
245 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
246 .getImm();
247 if (CurrentInstExportType == InstExportType) {
248 isLastInstructionOfItsType = false;
249 break;
250 }
251 }
252 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000253 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000254 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 return BB;
256 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
258 .addOperand(MI->getOperand(0))
259 .addOperand(MI->getOperand(1))
260 .addOperand(MI->getOperand(2))
261 .addOperand(MI->getOperand(3))
262 .addOperand(MI->getOperand(4))
263 .addOperand(MI->getOperand(5))
264 .addOperand(MI->getOperand(6))
265 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000266 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000267 break;
268 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000269 case AMDGPU::RETURN: {
270 // RETURN instructions must have the live-out registers as implicit uses,
271 // otherwise they appear dead.
272 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
273 MachineInstrBuilder MIB(*MF, MI);
274 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
275 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
276 return BB;
277 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000278 }
279
280 MI->eraseFromParent();
281 return BB;
282}
283
284//===----------------------------------------------------------------------===//
285// Custom DAG Lowering Operations
286//===----------------------------------------------------------------------===//
287
288using namespace llvm::Intrinsic;
289using namespace llvm::AMDGPUIntrinsic;
290
291static SDValue
292InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
293 unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
294 SDValue Scalar, SDValue Chain) {
295 if (!ExportMap[Slot]) {
296 SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
297 DL, MVT::v4f32,
298 DAG.getUNDEF(MVT::v4f32),
299 Scalar,
300 DAG.getConstant(Channel, MVT::i32));
301
302 unsigned Mask = 1 << Channel;
303
304 const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
305 DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
306 DAG.getConstant(Mask, MVT::i32)};
307
308 SDValue Res = DAG.getNode(
309 AMDGPUISD::EXPORT,
310 DL,
311 MVT::Other,
312 Ops, 6);
313 ExportMap[Slot] = Res.getNode();
314 return Res;
315 }
316
317 SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
318 SDValue PreviousVector = ExportInstruction->getOperand(1);
319 SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
320 DL, MVT::v4f32,
321 PreviousVector,
322 Scalar,
323 DAG.getConstant(Channel, MVT::i32));
324
325 unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
326 ->getZExtValue();
327 Mask |= (1 << Channel);
328
329 const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
330 DAG.getConstant(Inst, MVT::i32),
331 DAG.getConstant(Type, MVT::i32),
332 DAG.getConstant(Slot, MVT::i32),
333 DAG.getConstant(Mask, MVT::i32)};
334
335 DAG.UpdateNodeOperands(ExportInstruction,
336 Ops, 6);
337
338 return Chain;
339
340}
341
342SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
343 switch (Op.getOpcode()) {
344 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
345 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
346 case ISD::ROTL: return LowerROTL(Op, DAG);
347 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
348 case ISD::SELECT: return LowerSELECT(Op, DAG);
349 case ISD::SETCC: return LowerSETCC(Op, DAG);
350 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000351 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000352 case ISD::FPOW: return LowerFPOW(Op, DAG);
353 case ISD::INTRINSIC_VOID: {
354 SDValue Chain = Op.getOperand(0);
355 unsigned IntrinsicID =
356 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
357 switch (IntrinsicID) {
358 case AMDGPUIntrinsic::AMDGPU_store_output: {
359 MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000360 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000361 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
362 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000363 MFI->LiveOuts.push_back(Reg);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
365 }
366 case AMDGPUIntrinsic::R600_store_pixel_color: {
367 MachineFunction &MF = DAG.getMachineFunction();
368 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
369 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
370
371 SDNode **OutputsMap = MFI->Outputs;
372 return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
373 RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
374 Chain);
375
376 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000377
Tom Stellard75aadc22012-12-11 21:25:42 +0000378 // default for switch(IntrinsicID)
379 default: break;
380 }
381 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
382 break;
383 }
384 case ISD::INTRINSIC_WO_CHAIN: {
385 unsigned IntrinsicID =
386 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
387 EVT VT = Op.getValueType();
388 DebugLoc DL = Op.getDebugLoc();
389 switch(IntrinsicID) {
390 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
391 case AMDGPUIntrinsic::R600_load_input: {
392 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
393 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
394 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
395 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000396
397 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000398 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000399 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
400 MachineSDNode *interp;
401 if (ijb < 0) {
402 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
403 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
404 return DAG.getTargetExtractSubreg(
405 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
406 DL, MVT::f32, SDValue(interp, 0));
407 }
408
409 if (slot % 4 < 2)
410 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
411 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
412 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
413 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
414 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
415 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
416 else
417 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
418 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
419 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
420 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
421 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
422 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
423
424 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 }
426
427 case r600_read_ngroups_x:
428 return LowerImplicitParameter(DAG, VT, DL, 0);
429 case r600_read_ngroups_y:
430 return LowerImplicitParameter(DAG, VT, DL, 1);
431 case r600_read_ngroups_z:
432 return LowerImplicitParameter(DAG, VT, DL, 2);
433 case r600_read_global_size_x:
434 return LowerImplicitParameter(DAG, VT, DL, 3);
435 case r600_read_global_size_y:
436 return LowerImplicitParameter(DAG, VT, DL, 4);
437 case r600_read_global_size_z:
438 return LowerImplicitParameter(DAG, VT, DL, 5);
439 case r600_read_local_size_x:
440 return LowerImplicitParameter(DAG, VT, DL, 6);
441 case r600_read_local_size_y:
442 return LowerImplicitParameter(DAG, VT, DL, 7);
443 case r600_read_local_size_z:
444 return LowerImplicitParameter(DAG, VT, DL, 8);
445
446 case r600_read_tgid_x:
447 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
448 AMDGPU::T1_X, VT);
449 case r600_read_tgid_y:
450 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
451 AMDGPU::T1_Y, VT);
452 case r600_read_tgid_z:
453 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
454 AMDGPU::T1_Z, VT);
455 case r600_read_tidig_x:
456 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
457 AMDGPU::T0_X, VT);
458 case r600_read_tidig_y:
459 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
460 AMDGPU::T0_Y, VT);
461 case r600_read_tidig_z:
462 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
463 AMDGPU::T0_Z, VT);
464 }
465 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
466 break;
467 }
468 } // end switch(Op.getOpcode())
469 return SDValue();
470}
471
472void R600TargetLowering::ReplaceNodeResults(SDNode *N,
473 SmallVectorImpl<SDValue> &Results,
474 SelectionDAG &DAG) const {
475 switch (N->getOpcode()) {
476 default: return;
477 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000478 return;
479 case ISD::LOAD: {
480 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
481 Results.push_back(SDValue(Node, 0));
482 Results.push_back(SDValue(Node, 1));
483 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
484 // function
485 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
486 return;
487 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000488 }
489}
490
491SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
492 return DAG.getNode(
493 ISD::SETCC,
494 Op.getDebugLoc(),
495 MVT::i1,
496 Op, DAG.getConstantFP(0.0f, MVT::f32),
497 DAG.getCondCode(ISD::SETNE)
498 );
499}
500
501SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
502 SDValue Chain = Op.getOperand(0);
503 SDValue CC = Op.getOperand(1);
504 SDValue LHS = Op.getOperand(2);
505 SDValue RHS = Op.getOperand(3);
506 SDValue JumpT = Op.getOperand(4);
507 SDValue CmpValue;
508 SDValue Result;
509
510 if (LHS.getValueType() == MVT::i32) {
511 CmpValue = DAG.getNode(
512 ISD::SELECT_CC,
513 Op.getDebugLoc(),
514 MVT::i32,
515 LHS, RHS,
516 DAG.getConstant(-1, MVT::i32),
517 DAG.getConstant(0, MVT::i32),
518 CC);
519 } else if (LHS.getValueType() == MVT::f32) {
520 CmpValue = DAG.getNode(
521 ISD::SELECT_CC,
522 Op.getDebugLoc(),
523 MVT::f32,
524 LHS, RHS,
525 DAG.getConstantFP(1.0f, MVT::f32),
526 DAG.getConstantFP(0.0f, MVT::f32),
527 CC);
528 } else {
529 assert(0 && "Not valid type for br_cc");
530 }
531 Result = DAG.getNode(
532 AMDGPUISD::BRANCH_COND,
533 CmpValue.getDebugLoc(),
534 MVT::Other, Chain,
535 JumpT, CmpValue);
536 return Result;
537}
538
539SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
540 DebugLoc DL,
541 unsigned DwordOffset) const {
542 unsigned ByteOffset = DwordOffset * 4;
543 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
544 AMDGPUAS::PARAM_I_ADDRESS);
545
546 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
547 assert(isInt<16>(ByteOffset));
548
549 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
550 DAG.getConstant(ByteOffset, MVT::i32), // PTR
551 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
552 false, false, false, 0);
553}
554
555SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
556 DebugLoc DL = Op.getDebugLoc();
557 EVT VT = Op.getValueType();
558
559 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
560 Op.getOperand(0),
561 Op.getOperand(0),
562 DAG.getNode(ISD::SUB, DL, VT,
563 DAG.getConstant(32, MVT::i32),
564 Op.getOperand(1)));
565}
566
567bool R600TargetLowering::isZero(SDValue Op) const {
568 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
569 return Cst->isNullValue();
570 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
571 return CstFP->isZero();
572 } else {
573 return false;
574 }
575}
576
577SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
578 DebugLoc DL = Op.getDebugLoc();
579 EVT VT = Op.getValueType();
580
581 SDValue LHS = Op.getOperand(0);
582 SDValue RHS = Op.getOperand(1);
583 SDValue True = Op.getOperand(2);
584 SDValue False = Op.getOperand(3);
585 SDValue CC = Op.getOperand(4);
586 SDValue Temp;
587
588 // LHS and RHS are guaranteed to be the same value type
589 EVT CompareVT = LHS.getValueType();
590
591 // Check if we can lower this to a native operation.
592
593 // Try to lower to a CND* instruction:
594 // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
595 // can be lowered to CND* instructions can also be lowered to SET*
596 // instructions. CND* instructions are cheaper, because they dont't
597 // require additional instructions to convert their result to the correct
598 // value type, so this check should be first.
599 if (isZero(LHS) || isZero(RHS)) {
600 SDValue Cond = (isZero(LHS) ? RHS : LHS);
601 SDValue Zero = (isZero(LHS) ? LHS : RHS);
602 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
603 if (CompareVT != VT) {
604 // Bitcast True / False to the correct types. This will end up being
605 // a nop, but it allows us to define only a single pattern in the
606 // .TD files for each CND* instruction rather than having to have
607 // one pattern for integer True/False and one for fp True/False
608 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
609 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
610 }
611 if (isZero(LHS)) {
612 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
613 }
614
615 switch (CCOpcode) {
616 case ISD::SETONE:
617 case ISD::SETUNE:
618 case ISD::SETNE:
619 case ISD::SETULE:
620 case ISD::SETULT:
621 case ISD::SETOLE:
622 case ISD::SETOLT:
623 case ISD::SETLE:
624 case ISD::SETLT:
625 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
626 Temp = True;
627 True = False;
628 False = Temp;
629 break;
630 default:
631 break;
632 }
633 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
634 Cond, Zero,
635 True, False,
636 DAG.getCondCode(CCOpcode));
637 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
638 }
639
640 // Try to lower to a SET* instruction:
641 // We need all the operands of SELECT_CC to have the same value type, so if
642 // necessary we need to change True and False to be the same type as LHS and
643 // RHS, and then convert the result of the select_cc back to the correct type.
644
645 // Move hardware True/False values to the correct operand.
646 if (isHWTrueValue(False) && isHWFalseValue(True)) {
647 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
648 std::swap(False, True);
649 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
650 }
651
652 if (isHWTrueValue(True) && isHWFalseValue(False)) {
653 if (CompareVT != VT) {
654 if (VT == MVT::f32 && CompareVT == MVT::i32) {
655 SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
656 LHS, RHS,
657 DAG.getConstant(-1, MVT::i32),
658 DAG.getConstant(0, MVT::i32),
659 CC);
660 // Convert integer values of true (-1) and false (0) to fp values of
661 // true (1.0f) and false (0.0f).
662 SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
663 DAG.getConstant(1, MVT::i32));
664 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
665 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
666 SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
667 LHS, RHS,
668 DAG.getConstantFP(1.0f, MVT::f32),
669 DAG.getConstantFP(0.0f, MVT::f32),
670 CC);
671 // Convert fp values of true (1.0f) and false (0.0f) to integer values
672 // of true (-1) and false (0).
673 SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
674 return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
675 } else {
676 // I don't think there will be any other type pairings.
677 assert(!"Unhandled operand type parings in SELECT_CC");
678 }
679 } else {
680 // This SELECT_CC is already legal.
681 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
682 }
683 }
684
685 // Possible Min/Max pattern
686 SDValue MinMax = LowerMinMax(Op, DAG);
687 if (MinMax.getNode()) {
688 return MinMax;
689 }
690
691 // If we make it this for it means we have no native instructions to handle
692 // this SELECT_CC, so we must lower it.
693 SDValue HWTrue, HWFalse;
694
695 if (CompareVT == MVT::f32) {
696 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
697 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
698 } else if (CompareVT == MVT::i32) {
699 HWTrue = DAG.getConstant(-1, CompareVT);
700 HWFalse = DAG.getConstant(0, CompareVT);
701 }
702 else {
703 assert(!"Unhandled value type in LowerSELECT_CC");
704 }
705
706 // Lower this unsupported SELECT_CC into a combination of two supported
707 // SELECT_CC operations.
708 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
709
710 return DAG.getNode(ISD::SELECT_CC, DL, VT,
711 Cond, HWFalse,
712 True, False,
713 DAG.getCondCode(ISD::SETNE));
714}
715
716SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
717 return DAG.getNode(ISD::SELECT_CC,
718 Op.getDebugLoc(),
719 Op.getValueType(),
720 Op.getOperand(0),
721 DAG.getConstant(0, MVT::i32),
722 Op.getOperand(1),
723 Op.getOperand(2),
724 DAG.getCondCode(ISD::SETNE));
725}
726
727SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
728 SDValue Cond;
729 SDValue LHS = Op.getOperand(0);
730 SDValue RHS = Op.getOperand(1);
731 SDValue CC = Op.getOperand(2);
732 DebugLoc DL = Op.getDebugLoc();
733 assert(Op.getValueType() == MVT::i32);
734 if (LHS.getValueType() == MVT::i32) {
735 Cond = DAG.getNode(
736 ISD::SELECT_CC,
737 Op.getDebugLoc(),
738 MVT::i32,
739 LHS, RHS,
740 DAG.getConstant(-1, MVT::i32),
741 DAG.getConstant(0, MVT::i32),
742 CC);
743 } else if (LHS.getValueType() == MVT::f32) {
744 Cond = DAG.getNode(
745 ISD::SELECT_CC,
746 Op.getDebugLoc(),
747 MVT::f32,
748 LHS, RHS,
749 DAG.getConstantFP(1.0f, MVT::f32),
750 DAG.getConstantFP(0.0f, MVT::f32),
751 CC);
752 Cond = DAG.getNode(
753 ISD::FP_TO_SINT,
754 DL,
755 MVT::i32,
756 Cond);
757 } else {
758 assert(0 && "Not valid type for set_cc");
759 }
760 Cond = DAG.getNode(
761 ISD::AND,
762 DL,
763 MVT::i32,
764 DAG.getConstant(1, MVT::i32),
765 Cond);
766 return Cond;
767}
768
769SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
770 DebugLoc DL = Op.getDebugLoc();
771 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
772 SDValue Chain = Op.getOperand(0);
773 SDValue Value = Op.getOperand(1);
774 SDValue Ptr = Op.getOperand(2);
775
776 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
777 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
778 // Convert pointer from byte address to dword address.
779 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
780 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
781 Ptr, DAG.getConstant(2, MVT::i32)));
782
783 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
784 assert(!"Truncated and indexed stores not supported yet");
785 } else {
786 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
787 }
788 return Chain;
789 }
790 return SDValue();
791}
792
Tom Stellard365366f2013-01-23 02:09:06 +0000793// return (512 + (kc_bank << 12)
794static int
795ConstantAddressBlock(unsigned AddressSpace) {
796 switch (AddressSpace) {
797 case AMDGPUAS::CONSTANT_BUFFER_0:
798 return 512;
799 case AMDGPUAS::CONSTANT_BUFFER_1:
800 return 512 + 4096;
801 case AMDGPUAS::CONSTANT_BUFFER_2:
802 return 512 + 4096 * 2;
803 case AMDGPUAS::CONSTANT_BUFFER_3:
804 return 512 + 4096 * 3;
805 case AMDGPUAS::CONSTANT_BUFFER_4:
806 return 512 + 4096 * 4;
807 case AMDGPUAS::CONSTANT_BUFFER_5:
808 return 512 + 4096 * 5;
809 case AMDGPUAS::CONSTANT_BUFFER_6:
810 return 512 + 4096 * 6;
811 case AMDGPUAS::CONSTANT_BUFFER_7:
812 return 512 + 4096 * 7;
813 case AMDGPUAS::CONSTANT_BUFFER_8:
814 return 512 + 4096 * 8;
815 case AMDGPUAS::CONSTANT_BUFFER_9:
816 return 512 + 4096 * 9;
817 case AMDGPUAS::CONSTANT_BUFFER_10:
818 return 512 + 4096 * 10;
819 case AMDGPUAS::CONSTANT_BUFFER_11:
820 return 512 + 4096 * 11;
821 case AMDGPUAS::CONSTANT_BUFFER_12:
822 return 512 + 4096 * 12;
823 case AMDGPUAS::CONSTANT_BUFFER_13:
824 return 512 + 4096 * 13;
825 case AMDGPUAS::CONSTANT_BUFFER_14:
826 return 512 + 4096 * 14;
827 case AMDGPUAS::CONSTANT_BUFFER_15:
828 return 512 + 4096 * 15;
829 default:
830 return -1;
831 }
832}
833
834SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
835{
836 EVT VT = Op.getValueType();
837 DebugLoc DL = Op.getDebugLoc();
838 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
839 SDValue Chain = Op.getOperand(0);
840 SDValue Ptr = Op.getOperand(1);
841 SDValue LoweredLoad;
842
843 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
844 if (ConstantBlock > -1) {
845 SDValue Result;
846 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
847 dyn_cast<Constant>(LoadNode->getSrcValue())) {
848 SDValue Slots[4];
849 for (unsigned i = 0; i < 4; i++) {
850 // We want Const position encoded with the following formula :
851 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
852 // const_index is Ptr computed by llvm using an alignment of 16.
853 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
854 // then div by 4 at the ISel step
855 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
856 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
857 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
858 }
859 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
860 } else {
861 // non constant ptr cant be folded, keeps it as a v4f32 load
862 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
863 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
864 );
865 }
866
867 if (!VT.isVector()) {
868 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
869 DAG.getConstant(0, MVT::i32));
870 }
871
872 SDValue MergedValues[2] = {
873 Result,
874 Chain
875 };
876 return DAG.getMergeValues(MergedValues, 2, DL);
877 }
878
879 return SDValue();
880}
Tom Stellard75aadc22012-12-11 21:25:42 +0000881
882SDValue R600TargetLowering::LowerFPOW(SDValue Op,
883 SelectionDAG &DAG) const {
884 DebugLoc DL = Op.getDebugLoc();
885 EVT VT = Op.getValueType();
886 SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
887 SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
888 return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
889}
890
891/// XXX Only kernel functions are supported, so we can assume for now that
892/// every function is a kernel function, but in the future we should use
893/// separate calling conventions for kernel and non-kernel functions.
894SDValue R600TargetLowering::LowerFormalArguments(
895 SDValue Chain,
896 CallingConv::ID CallConv,
897 bool isVarArg,
898 const SmallVectorImpl<ISD::InputArg> &Ins,
899 DebugLoc DL, SelectionDAG &DAG,
900 SmallVectorImpl<SDValue> &InVals) const {
901 unsigned ParamOffsetBytes = 36;
902 Function::const_arg_iterator FuncArg =
903 DAG.getMachineFunction().getFunction()->arg_begin();
904 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
905 EVT VT = Ins[i].VT;
906 Type *ArgType = FuncArg->getType();
907 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
908 32 : ArgType->getPrimitiveSizeInBits();
909 unsigned ArgBytes = ArgSizeInBits >> 3;
910 EVT ArgVT;
911 if (ArgSizeInBits < VT.getSizeInBits()) {
912 assert(!ArgType->isFloatTy() &&
913 "Extending floating point arguments not supported yet");
914 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
915 } else {
916 ArgVT = VT;
917 }
918 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
919 AMDGPUAS::PARAM_I_ADDRESS);
920 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
921 DAG.getConstant(ParamOffsetBytes, MVT::i32),
922 MachinePointerInfo(new Argument(PtrTy)),
923 ArgVT, false, false, ArgBytes);
924 InVals.push_back(Arg);
925 ParamOffsetBytes += ArgBytes;
926 }
927 return Chain;
928}
929
930EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
931 if (!VT.isVector()) return MVT::i32;
932 return VT.changeVectorElementTypeToInteger();
933}
934
935//===----------------------------------------------------------------------===//
936// Custom DAG Optimizations
937//===----------------------------------------------------------------------===//
938
939SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
940 DAGCombinerInfo &DCI) const {
941 SelectionDAG &DAG = DCI.DAG;
942
943 switch (N->getOpcode()) {
944 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
945 case ISD::FP_ROUND: {
946 SDValue Arg = N->getOperand(0);
947 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
948 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
949 Arg.getOperand(0));
950 }
951 break;
952 }
Tom Stellard365366f2013-01-23 02:09:06 +0000953 // Extract_vec (Build_vector) generated by custom lowering
954 // also needs to be customly combined
955 case ISD::EXTRACT_VECTOR_ELT: {
956 SDValue Arg = N->getOperand(0);
957 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
958 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
959 unsigned Element = Const->getZExtValue();
960 return Arg->getOperand(Element);
961 }
962 }
Tom Stellarddd04c832013-01-31 22:11:53 +0000963 if (Arg.getOpcode() == ISD::BITCAST &&
964 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
965 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
966 unsigned Element = Const->getZExtValue();
967 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
968 Arg->getOperand(0).getOperand(Element));
969 }
970 }
Tom Stellard365366f2013-01-23 02:09:06 +0000971 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000972 }
973 return SDValue();
974}