blob: ad5a82fcfea16d626a8e67dc03d11ae28f61e604 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDILIntrinsicInfo.h"
17#include "AMDGPUFrameLowering.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000071
72 setOperationAction(ISD::FSUB, MVT::f32, Expand);
73
74 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
75 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000077
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
79 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
80
Tom Stellarde8f9f282013-03-08 15:37:05 +000081 setOperationAction(ISD::SETCC, MVT::i32, Expand);
82 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000083 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
84
Tom Stellard53f2f902013-09-05 18:38:03 +000085 setOperationAction(ISD::SELECT, MVT::i32, Expand);
86 setOperationAction(ISD::SELECT, MVT::f32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000088 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Matt Arsenault4e466652014-04-16 01:41:30 +000090 // Expand sign extension of vectors
91 if (!Subtarget->hasBFE())
92 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
93
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
96
97 if (!Subtarget->hasBFE())
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
101
102 if (!Subtarget->hasBFE())
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
106
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
112
113
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000114 // Legalize loads and stores to the private address space.
115 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000116 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000117 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000118
119 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
120 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000121 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
122 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000125 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
127
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000128 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000129 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000130 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000131 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000132 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
133 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
Tom Stellard365366f2013-01-23 02:09:06 +0000135 setOperationAction(ISD::LOAD, MVT::i32, Custom);
136 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000137 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
138
Tom Stellard880a80a2014-06-17 16:53:14 +0000139 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
140 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
141 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
143
144 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
145 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
147 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
148
Tom Stellard75aadc22012-12-11 21:25:42 +0000149 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000150 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000151 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000152 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000153 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000154
Tom Stellard5f337882014-04-29 23:12:43 +0000155 // These should be replaced by UDVIREM, but it does not happen automatically
156 // during Type Legalization
157 setOperationAction(ISD::UDIV, MVT::i64, Custom);
158 setOperationAction(ISD::UREM, MVT::i64, Custom);
159
Jan Vesely25f36272014-06-18 12:27:13 +0000160 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
161 // to be Legal/Custom in order to avoid library calls.
162 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000163 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000164
Michel Danzer49812b52013-07-10 16:37:07 +0000165 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
166
Tom Stellardb852af52013-03-08 15:37:03 +0000167 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000168 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000169 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000170}
171
172MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
173 MachineInstr * MI, MachineBasicBlock * BB) const {
174 MachineFunction * MF = BB->getParent();
175 MachineRegisterInfo &MRI = MF->getRegInfo();
176 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000177 const R600InstrInfo *TII =
178 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000179
180 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000181 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000182 // Replace LDS_*_RET instruction that don't have any uses with the
183 // equivalent LDS_*_NORET instruction.
184 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000185 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
186 assert(DstIdx != -1);
187 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000188 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
189 return BB;
190
191 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
192 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000193 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
194 NewMI.addOperand(MI->getOperand(i));
195 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000196 } else {
197 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
198 }
199 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000200 case AMDGPU::CLAMP_R600: {
201 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
202 AMDGPU::MOV,
203 MI->getOperand(0).getReg(),
204 MI->getOperand(1).getReg());
205 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
206 break;
207 }
208
209 case AMDGPU::FABS_R600: {
210 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
211 AMDGPU::MOV,
212 MI->getOperand(0).getReg(),
213 MI->getOperand(1).getReg());
214 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
215 break;
216 }
217
218 case AMDGPU::FNEG_R600: {
219 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
220 AMDGPU::MOV,
221 MI->getOperand(0).getReg(),
222 MI->getOperand(1).getReg());
223 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
224 break;
225 }
226
Tom Stellard75aadc22012-12-11 21:25:42 +0000227 case AMDGPU::MASK_WRITE: {
228 unsigned maskedRegister = MI->getOperand(0).getReg();
229 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
230 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
231 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
232 break;
233 }
234
235 case AMDGPU::MOV_IMM_F32:
236 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
237 MI->getOperand(1).getFPImm()->getValueAPF()
238 .bitcastToAPInt().getZExtValue());
239 break;
240 case AMDGPU::MOV_IMM_I32:
241 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
242 MI->getOperand(1).getImm());
243 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000244 case AMDGPU::CONST_COPY: {
245 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
246 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000247 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000248 MI->getOperand(1).getImm());
249 break;
250 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000251
252 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000253 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000254 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000255 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000256
257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
258 .addOperand(MI->getOperand(0))
259 .addOperand(MI->getOperand(1))
260 .addImm(EOP); // Set End of program bit
261 break;
262 }
263
Tom Stellard75aadc22012-12-11 21:25:42 +0000264 case AMDGPU::TXD: {
265 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
266 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000267 MachineOperand &RID = MI->getOperand(4);
268 MachineOperand &SID = MI->getOperand(5);
269 unsigned TextureId = MI->getOperand(6).getImm();
270 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
271 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000272
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000273 switch (TextureId) {
274 case 5: // Rect
275 CTX = CTY = 0;
276 break;
277 case 6: // Shadow1D
278 SrcW = SrcZ;
279 break;
280 case 7: // Shadow2D
281 SrcW = SrcZ;
282 break;
283 case 8: // ShadowRect
284 CTX = CTY = 0;
285 SrcW = SrcZ;
286 break;
287 case 9: // 1DArray
288 SrcZ = SrcY;
289 CTZ = 0;
290 break;
291 case 10: // 2DArray
292 CTZ = 0;
293 break;
294 case 11: // Shadow1DArray
295 SrcZ = SrcY;
296 CTZ = 0;
297 break;
298 case 12: // Shadow2DArray
299 CTZ = 0;
300 break;
301 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000302 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
303 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000304 .addImm(SrcX)
305 .addImm(SrcY)
306 .addImm(SrcZ)
307 .addImm(SrcW)
308 .addImm(0)
309 .addImm(0)
310 .addImm(0)
311 .addImm(0)
312 .addImm(1)
313 .addImm(2)
314 .addImm(3)
315 .addOperand(RID)
316 .addOperand(SID)
317 .addImm(CTX)
318 .addImm(CTY)
319 .addImm(CTZ)
320 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000321 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
322 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000323 .addImm(SrcX)
324 .addImm(SrcY)
325 .addImm(SrcZ)
326 .addImm(SrcW)
327 .addImm(0)
328 .addImm(0)
329 .addImm(0)
330 .addImm(0)
331 .addImm(1)
332 .addImm(2)
333 .addImm(3)
334 .addOperand(RID)
335 .addOperand(SID)
336 .addImm(CTX)
337 .addImm(CTY)
338 .addImm(CTZ)
339 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000340 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
341 .addOperand(MI->getOperand(0))
342 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000343 .addImm(SrcX)
344 .addImm(SrcY)
345 .addImm(SrcZ)
346 .addImm(SrcW)
347 .addImm(0)
348 .addImm(0)
349 .addImm(0)
350 .addImm(0)
351 .addImm(1)
352 .addImm(2)
353 .addImm(3)
354 .addOperand(RID)
355 .addOperand(SID)
356 .addImm(CTX)
357 .addImm(CTY)
358 .addImm(CTZ)
359 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000360 .addReg(T0, RegState::Implicit)
361 .addReg(T1, RegState::Implicit);
362 break;
363 }
364
365 case AMDGPU::TXD_SHADOW: {
366 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
367 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000368 MachineOperand &RID = MI->getOperand(4);
369 MachineOperand &SID = MI->getOperand(5);
370 unsigned TextureId = MI->getOperand(6).getImm();
371 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
372 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
373
374 switch (TextureId) {
375 case 5: // Rect
376 CTX = CTY = 0;
377 break;
378 case 6: // Shadow1D
379 SrcW = SrcZ;
380 break;
381 case 7: // Shadow2D
382 SrcW = SrcZ;
383 break;
384 case 8: // ShadowRect
385 CTX = CTY = 0;
386 SrcW = SrcZ;
387 break;
388 case 9: // 1DArray
389 SrcZ = SrcY;
390 CTZ = 0;
391 break;
392 case 10: // 2DArray
393 CTZ = 0;
394 break;
395 case 11: // Shadow1DArray
396 SrcZ = SrcY;
397 CTZ = 0;
398 break;
399 case 12: // Shadow2DArray
400 CTZ = 0;
401 break;
402 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000403
404 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
405 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000406 .addImm(SrcX)
407 .addImm(SrcY)
408 .addImm(SrcZ)
409 .addImm(SrcW)
410 .addImm(0)
411 .addImm(0)
412 .addImm(0)
413 .addImm(0)
414 .addImm(1)
415 .addImm(2)
416 .addImm(3)
417 .addOperand(RID)
418 .addOperand(SID)
419 .addImm(CTX)
420 .addImm(CTY)
421 .addImm(CTZ)
422 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
424 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000425 .addImm(SrcX)
426 .addImm(SrcY)
427 .addImm(SrcZ)
428 .addImm(SrcW)
429 .addImm(0)
430 .addImm(0)
431 .addImm(0)
432 .addImm(0)
433 .addImm(1)
434 .addImm(2)
435 .addImm(3)
436 .addOperand(RID)
437 .addOperand(SID)
438 .addImm(CTX)
439 .addImm(CTY)
440 .addImm(CTZ)
441 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000442 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
443 .addOperand(MI->getOperand(0))
444 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000445 .addImm(SrcX)
446 .addImm(SrcY)
447 .addImm(SrcZ)
448 .addImm(SrcW)
449 .addImm(0)
450 .addImm(0)
451 .addImm(0)
452 .addImm(0)
453 .addImm(1)
454 .addImm(2)
455 .addImm(3)
456 .addOperand(RID)
457 .addOperand(SID)
458 .addImm(CTX)
459 .addImm(CTY)
460 .addImm(CTZ)
461 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000462 .addReg(T0, RegState::Implicit)
463 .addReg(T1, RegState::Implicit);
464 break;
465 }
466
467 case AMDGPU::BRANCH:
468 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000469 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000470 break;
471
472 case AMDGPU::BRANCH_COND_f32: {
473 MachineInstr *NewMI =
474 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
475 AMDGPU::PREDICATE_BIT)
476 .addOperand(MI->getOperand(1))
477 .addImm(OPCODE_IS_NOT_ZERO)
478 .addImm(0); // Flags
479 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000480 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000481 .addOperand(MI->getOperand(0))
482 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
483 break;
484 }
485
486 case AMDGPU::BRANCH_COND_i32: {
487 MachineInstr *NewMI =
488 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
489 AMDGPU::PREDICATE_BIT)
490 .addOperand(MI->getOperand(1))
491 .addImm(OPCODE_IS_NOT_ZERO_INT)
492 .addImm(0); // Flags
493 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000494 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 .addOperand(MI->getOperand(0))
496 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
497 break;
498 }
499
Tom Stellard75aadc22012-12-11 21:25:42 +0000500 case AMDGPU::EG_ExportSwz:
501 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000502 // Instruction is left unmodified if its not the last one of its type
503 bool isLastInstructionOfItsType = true;
504 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000505 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000506 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000507 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000508 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
509 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
510 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
511 .getImm();
512 if (CurrentInstExportType == InstExportType) {
513 isLastInstructionOfItsType = false;
514 break;
515 }
516 }
517 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000518 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000519 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 return BB;
521 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
523 .addOperand(MI->getOperand(0))
524 .addOperand(MI->getOperand(1))
525 .addOperand(MI->getOperand(2))
526 .addOperand(MI->getOperand(3))
527 .addOperand(MI->getOperand(4))
528 .addOperand(MI->getOperand(5))
529 .addOperand(MI->getOperand(6))
530 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000531 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000532 break;
533 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000534 case AMDGPU::RETURN: {
535 // RETURN instructions must have the live-out registers as implicit uses,
536 // otherwise they appear dead.
537 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
538 MachineInstrBuilder MIB(*MF, MI);
539 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
540 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
541 return BB;
542 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
544
545 MI->eraseFromParent();
546 return BB;
547}
548
549//===----------------------------------------------------------------------===//
550// Custom DAG Lowering Operations
551//===----------------------------------------------------------------------===//
552
Tom Stellard75aadc22012-12-11 21:25:42 +0000553SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000554 MachineFunction &MF = DAG.getMachineFunction();
555 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 switch (Op.getOpcode()) {
557 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000558 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
559 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000560 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000561 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000562 case ISD::FCOS:
563 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000564 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000565 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000566 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000567 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 case ISD::INTRINSIC_VOID: {
569 SDValue Chain = Op.getOperand(0);
570 unsigned IntrinsicID =
571 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
572 switch (IntrinsicID) {
573 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000574 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
575 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000576 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000577 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000578 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000579 case AMDGPUIntrinsic::R600_store_swizzle: {
580 const SDValue Args[8] = {
581 Chain,
582 Op.getOperand(2), // Export Value
583 Op.getOperand(3), // ArrayBase
584 Op.getOperand(4), // Type
585 DAG.getConstant(0, MVT::i32), // SWZ_X
586 DAG.getConstant(1, MVT::i32), // SWZ_Y
587 DAG.getConstant(2, MVT::i32), // SWZ_Z
588 DAG.getConstant(3, MVT::i32) // SWZ_W
589 };
Craig Topper48d114b2014-04-26 18:35:24 +0000590 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000592
Tom Stellard75aadc22012-12-11 21:25:42 +0000593 // default for switch(IntrinsicID)
594 default: break;
595 }
596 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
597 break;
598 }
599 case ISD::INTRINSIC_WO_CHAIN: {
600 unsigned IntrinsicID =
601 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
602 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000603 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000604 switch(IntrinsicID) {
605 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000606 case AMDGPUIntrinsic::R600_load_input: {
607 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
608 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
609 MachineFunction &MF = DAG.getMachineFunction();
610 MachineRegisterInfo &MRI = MF.getRegInfo();
611 MRI.addLiveIn(Reg);
612 return DAG.getCopyFromReg(DAG.getEntryNode(),
613 SDLoc(DAG.getEntryNode()), Reg, VT);
614 }
615
616 case AMDGPUIntrinsic::R600_interp_input: {
617 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
618 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
619 MachineSDNode *interp;
620 if (ijb < 0) {
621 const MachineFunction &MF = DAG.getMachineFunction();
622 const R600InstrInfo *TII =
623 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
624 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
625 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
626 return DAG.getTargetExtractSubreg(
627 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
628 DL, MVT::f32, SDValue(interp, 0));
629 }
630 MachineFunction &MF = DAG.getMachineFunction();
631 MachineRegisterInfo &MRI = MF.getRegInfo();
632 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
633 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
634 MRI.addLiveIn(RegisterI);
635 MRI.addLiveIn(RegisterJ);
636 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
637 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
638 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
639 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
640
641 if (slot % 4 < 2)
642 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
643 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
644 RegisterJNode, RegisterINode);
645 else
646 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
647 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
648 RegisterJNode, RegisterINode);
649 return SDValue(interp, slot % 2);
650 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000651 case AMDGPUIntrinsic::R600_interp_xy:
652 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000653 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000654 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000655 SDValue RegisterINode = Op.getOperand(2);
656 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000657
Vincent Lejeunef143af32013-11-11 22:10:24 +0000658 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000659 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000660 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000661 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000662 else
663 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000664 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000665 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000666 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
667 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000668 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000669 case AMDGPUIntrinsic::R600_tex:
670 case AMDGPUIntrinsic::R600_texc:
671 case AMDGPUIntrinsic::R600_txl:
672 case AMDGPUIntrinsic::R600_txlc:
673 case AMDGPUIntrinsic::R600_txb:
674 case AMDGPUIntrinsic::R600_txbc:
675 case AMDGPUIntrinsic::R600_txf:
676 case AMDGPUIntrinsic::R600_txq:
677 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000678 case AMDGPUIntrinsic::R600_ddy:
679 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000680 unsigned TextureOp;
681 switch (IntrinsicID) {
682 case AMDGPUIntrinsic::R600_tex:
683 TextureOp = 0;
684 break;
685 case AMDGPUIntrinsic::R600_texc:
686 TextureOp = 1;
687 break;
688 case AMDGPUIntrinsic::R600_txl:
689 TextureOp = 2;
690 break;
691 case AMDGPUIntrinsic::R600_txlc:
692 TextureOp = 3;
693 break;
694 case AMDGPUIntrinsic::R600_txb:
695 TextureOp = 4;
696 break;
697 case AMDGPUIntrinsic::R600_txbc:
698 TextureOp = 5;
699 break;
700 case AMDGPUIntrinsic::R600_txf:
701 TextureOp = 6;
702 break;
703 case AMDGPUIntrinsic::R600_txq:
704 TextureOp = 7;
705 break;
706 case AMDGPUIntrinsic::R600_ddx:
707 TextureOp = 8;
708 break;
709 case AMDGPUIntrinsic::R600_ddy:
710 TextureOp = 9;
711 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000712 case AMDGPUIntrinsic::R600_ldptr:
713 TextureOp = 10;
714 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000715 default:
716 llvm_unreachable("Unknow Texture Operation");
717 }
718
719 SDValue TexArgs[19] = {
720 DAG.getConstant(TextureOp, MVT::i32),
721 Op.getOperand(1),
722 DAG.getConstant(0, MVT::i32),
723 DAG.getConstant(1, MVT::i32),
724 DAG.getConstant(2, MVT::i32),
725 DAG.getConstant(3, MVT::i32),
726 Op.getOperand(2),
727 Op.getOperand(3),
728 Op.getOperand(4),
729 DAG.getConstant(0, MVT::i32),
730 DAG.getConstant(1, MVT::i32),
731 DAG.getConstant(2, MVT::i32),
732 DAG.getConstant(3, MVT::i32),
733 Op.getOperand(5),
734 Op.getOperand(6),
735 Op.getOperand(7),
736 Op.getOperand(8),
737 Op.getOperand(9),
738 Op.getOperand(10)
739 };
Craig Topper48d114b2014-04-26 18:35:24 +0000740 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000741 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000742 case AMDGPUIntrinsic::AMDGPU_dp4: {
743 SDValue Args[8] = {
744 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
745 DAG.getConstant(0, MVT::i32)),
746 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
747 DAG.getConstant(0, MVT::i32)),
748 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
749 DAG.getConstant(1, MVT::i32)),
750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
751 DAG.getConstant(1, MVT::i32)),
752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
753 DAG.getConstant(2, MVT::i32)),
754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
755 DAG.getConstant(2, MVT::i32)),
756 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
757 DAG.getConstant(3, MVT::i32)),
758 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
759 DAG.getConstant(3, MVT::i32))
760 };
Craig Topper48d114b2014-04-26 18:35:24 +0000761 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000762 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000763
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000764 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000765 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000766 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000767 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 8);
782
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000783 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000784 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
785 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000786 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000787 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
788 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000789 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000790 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
791 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000792 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000793 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
794 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
797 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
800 AMDGPU::T0_Z, VT);
801 }
802 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
803 break;
804 }
805 } // end switch(Op.getOpcode())
806 return SDValue();
807}
808
809void R600TargetLowering::ReplaceNodeResults(SDNode *N,
810 SmallVectorImpl<SDValue> &Results,
811 SelectionDAG &DAG) const {
812 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000813 default:
814 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
815 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000817 return;
818 case ISD::LOAD: {
819 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
820 Results.push_back(SDValue(Node, 0));
821 Results.push_back(SDValue(Node, 1));
822 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
823 // function
824 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
825 return;
826 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000827 case ISD::STORE:
828 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
829 Results.push_back(SDValue(Node, 0));
830 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000831 }
832}
833
Tom Stellard880a80a2014-06-17 16:53:14 +0000834SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
835 SDValue Vector) const {
836
837 SDLoc DL(Vector);
838 EVT VecVT = Vector.getValueType();
839 EVT EltVT = VecVT.getVectorElementType();
840 SmallVector<SDValue, 8> Args;
841
842 for (unsigned i = 0, e = VecVT.getVectorNumElements();
843 i != e; ++i) {
844 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
845 Vector, DAG.getConstant(i, getVectorIdxTy())));
846 }
847
848 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
849}
850
851SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
852 SelectionDAG &DAG) const {
853
854 SDLoc DL(Op);
855 SDValue Vector = Op.getOperand(0);
856 SDValue Index = Op.getOperand(1);
857
858 if (isa<ConstantSDNode>(Index) ||
859 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
860 return Op;
861
862 Vector = vectorToVerticalVector(DAG, Vector);
863 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
864 Vector, Index);
865}
866
867SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
868 SelectionDAG &DAG) const {
869 SDLoc DL(Op);
870 SDValue Vector = Op.getOperand(0);
871 SDValue Value = Op.getOperand(1);
872 SDValue Index = Op.getOperand(2);
873
874 if (isa<ConstantSDNode>(Index) ||
875 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
876 return Op;
877
878 Vector = vectorToVerticalVector(DAG, Vector);
879 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
880 Vector, Value, Index);
881 return vectorToVerticalVector(DAG, Insert);
882}
883
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000884SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
885 // On hw >= R700, COS/SIN input must be between -1. and 1.
886 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
887 EVT VT = Op.getValueType();
888 SDValue Arg = Op.getOperand(0);
889 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
890 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
891 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
892 DAG.getConstantFP(0.15915494309, MVT::f32)),
893 DAG.getConstantFP(0.5, MVT::f32)));
894 unsigned TrigNode;
895 switch (Op.getOpcode()) {
896 case ISD::FCOS:
897 TrigNode = AMDGPUISD::COS_HW;
898 break;
899 case ISD::FSIN:
900 TrigNode = AMDGPUISD::SIN_HW;
901 break;
902 default:
903 llvm_unreachable("Wrong trig opcode");
904 }
905 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
906 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
907 DAG.getConstantFP(-0.5, MVT::f32)));
908 if (Gen >= AMDGPUSubtarget::R700)
909 return TrigVal;
910 // On R600 hw, COS/SIN input must be between -Pi and Pi.
911 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
912 DAG.getConstantFP(3.14159265359, MVT::f32));
913}
914
Jan Vesely25f36272014-06-18 12:27:13 +0000915SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
916 SDLoc DL(Op);
917 EVT VT = Op.getValueType();
918
919 SDValue Lo = Op.getOperand(0);
920 SDValue Hi = Op.getOperand(1);
921 SDValue Shift = Op.getOperand(2);
922 SDValue Zero = DAG.getConstant(0, VT);
923 SDValue One = DAG.getConstant(1, VT);
924
925 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
926 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
927 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
928 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
929
930 // The dance around Width1 is necessary for 0 special case.
931 // Without it the CompShift might be 32, producing incorrect results in
932 // Overflow. So we do the shift in two steps, the alternative is to
933 // add a conditional to filter the special case.
934
935 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
936 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
937
938 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
939 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
940 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
941
942 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
943 SDValue LoBig = Zero;
944
945 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
946 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
947
948 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
949}
950
Jan Vesely900ff2e2014-06-18 12:27:15 +0000951SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
952 SDLoc DL(Op);
953 EVT VT = Op.getValueType();
954
955 SDValue Lo = Op.getOperand(0);
956 SDValue Hi = Op.getOperand(1);
957 SDValue Shift = Op.getOperand(2);
958 SDValue Zero = DAG.getConstant(0, VT);
959 SDValue One = DAG.getConstant(1, VT);
960
961 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
962 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
963 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
964 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
965
966 // The dance around Width1 is necessary for 0 special case.
967 // Without it the CompShift might be 32, producing incorrect results in
968 // Overflow. So we do the shift in two steps, the alternative is to
969 // add a conditional to filter the special case.
970
971 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
972 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
973
974 // TODO: SRA support here
975 SDValue HiSmall = DAG.getNode(ISD::SRL, DL, VT, Hi, Shift);
976 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
977 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
978
979 // TODO: SRA support here
980 SDValue LoBig = DAG.getNode(ISD::SRL, DL, VT, Hi, BigShift);
981 SDValue HiBig = Zero;
982
983 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
984 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
985
986 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
987}
988
Tom Stellard75aadc22012-12-11 21:25:42 +0000989SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
990 return DAG.getNode(
991 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000992 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000993 MVT::i1,
994 Op, DAG.getConstantFP(0.0f, MVT::f32),
995 DAG.getCondCode(ISD::SETNE)
996 );
997}
998
Tom Stellard75aadc22012-12-11 21:25:42 +0000999SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001000 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001001 unsigned DwordOffset) const {
1002 unsigned ByteOffset = DwordOffset * 4;
1003 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001004 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001005
1006 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1007 assert(isInt<16>(ByteOffset));
1008
1009 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1010 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1011 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1012 false, false, false, 0);
1013}
1014
Tom Stellard75aadc22012-12-11 21:25:42 +00001015bool R600TargetLowering::isZero(SDValue Op) const {
1016 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1017 return Cst->isNullValue();
1018 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1019 return CstFP->isZero();
1020 } else {
1021 return false;
1022 }
1023}
1024
1025SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001026 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001027 EVT VT = Op.getValueType();
1028
1029 SDValue LHS = Op.getOperand(0);
1030 SDValue RHS = Op.getOperand(1);
1031 SDValue True = Op.getOperand(2);
1032 SDValue False = Op.getOperand(3);
1033 SDValue CC = Op.getOperand(4);
1034 SDValue Temp;
1035
1036 // LHS and RHS are guaranteed to be the same value type
1037 EVT CompareVT = LHS.getValueType();
1038
1039 // Check if we can lower this to a native operation.
1040
Tom Stellard2add82d2013-03-08 15:37:09 +00001041 // Try to lower to a SET* instruction:
1042 //
1043 // SET* can match the following patterns:
1044 //
Tom Stellardcd428182013-09-28 02:50:38 +00001045 // select_cc f32, f32, -1, 0, cc_supported
1046 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1047 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001048 //
1049
1050 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001051 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1052 ISD::CondCode InverseCC =
1053 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001054 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1055 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1056 std::swap(False, True);
1057 CC = DAG.getCondCode(InverseCC);
1058 } else {
1059 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1060 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1061 std::swap(False, True);
1062 std::swap(LHS, RHS);
1063 CC = DAG.getCondCode(SwapInvCC);
1064 }
1065 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001066 }
1067
1068 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1069 (CompareVT == VT || VT == MVT::i32)) {
1070 // This can be matched by a SET* instruction.
1071 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1072 }
1073
Tom Stellard75aadc22012-12-11 21:25:42 +00001074 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001075 //
1076 // CND* can match the following patterns:
1077 //
Tom Stellardcd428182013-09-28 02:50:38 +00001078 // select_cc f32, 0.0, f32, f32, cc_supported
1079 // select_cc f32, 0.0, i32, i32, cc_supported
1080 // select_cc i32, 0, f32, f32, cc_supported
1081 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001082 //
Tom Stellardcd428182013-09-28 02:50:38 +00001083
1084 // Try to move the zero value to the RHS
1085 if (isZero(LHS)) {
1086 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1087 // Try swapping the operands
1088 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1089 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1090 std::swap(LHS, RHS);
1091 CC = DAG.getCondCode(CCSwapped);
1092 } else {
1093 // Try inverting the conditon and then swapping the operands
1094 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1095 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1096 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1097 std::swap(True, False);
1098 std::swap(LHS, RHS);
1099 CC = DAG.getCondCode(CCSwapped);
1100 }
1101 }
1102 }
1103 if (isZero(RHS)) {
1104 SDValue Cond = LHS;
1105 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001106 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1107 if (CompareVT != VT) {
1108 // Bitcast True / False to the correct types. This will end up being
1109 // a nop, but it allows us to define only a single pattern in the
1110 // .TD files for each CND* instruction rather than having to have
1111 // one pattern for integer True/False and one for fp True/False
1112 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1113 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1114 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001115
1116 switch (CCOpcode) {
1117 case ISD::SETONE:
1118 case ISD::SETUNE:
1119 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001120 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1121 Temp = True;
1122 True = False;
1123 False = Temp;
1124 break;
1125 default:
1126 break;
1127 }
1128 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1129 Cond, Zero,
1130 True, False,
1131 DAG.getCondCode(CCOpcode));
1132 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1133 }
1134
Tom Stellard75aadc22012-12-11 21:25:42 +00001135 // If we make it this for it means we have no native instructions to handle
1136 // this SELECT_CC, so we must lower it.
1137 SDValue HWTrue, HWFalse;
1138
1139 if (CompareVT == MVT::f32) {
1140 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1141 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1142 } else if (CompareVT == MVT::i32) {
1143 HWTrue = DAG.getConstant(-1, CompareVT);
1144 HWFalse = DAG.getConstant(0, CompareVT);
1145 }
1146 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001147 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001148 }
1149
1150 // Lower this unsupported SELECT_CC into a combination of two supported
1151 // SELECT_CC operations.
1152 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1153
1154 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1155 Cond, HWFalse,
1156 True, False,
1157 DAG.getCondCode(ISD::SETNE));
1158}
1159
Alp Tokercb402912014-01-24 17:20:08 +00001160/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001161/// convert these pointers to a register index. Each register holds
1162/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1163/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1164/// for indirect addressing.
1165SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1166 unsigned StackWidth,
1167 SelectionDAG &DAG) const {
1168 unsigned SRLPad;
1169 switch(StackWidth) {
1170 case 1:
1171 SRLPad = 2;
1172 break;
1173 case 2:
1174 SRLPad = 3;
1175 break;
1176 case 4:
1177 SRLPad = 4;
1178 break;
1179 default: llvm_unreachable("Invalid stack width");
1180 }
1181
Andrew Trickef9de2a2013-05-25 02:42:55 +00001182 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001183 DAG.getConstant(SRLPad, MVT::i32));
1184}
1185
1186void R600TargetLowering::getStackAddress(unsigned StackWidth,
1187 unsigned ElemIdx,
1188 unsigned &Channel,
1189 unsigned &PtrIncr) const {
1190 switch (StackWidth) {
1191 default:
1192 case 1:
1193 Channel = 0;
1194 if (ElemIdx > 0) {
1195 PtrIncr = 1;
1196 } else {
1197 PtrIncr = 0;
1198 }
1199 break;
1200 case 2:
1201 Channel = ElemIdx % 2;
1202 if (ElemIdx == 2) {
1203 PtrIncr = 1;
1204 } else {
1205 PtrIncr = 0;
1206 }
1207 break;
1208 case 4:
1209 Channel = ElemIdx;
1210 PtrIncr = 0;
1211 break;
1212 }
1213}
1214
Tom Stellard75aadc22012-12-11 21:25:42 +00001215SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001216 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001217 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1218 SDValue Chain = Op.getOperand(0);
1219 SDValue Value = Op.getOperand(1);
1220 SDValue Ptr = Op.getOperand(2);
1221
Tom Stellard2ffc3302013-08-26 15:05:44 +00001222 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001223 if (Result.getNode()) {
1224 return Result;
1225 }
1226
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001227 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1228 if (StoreNode->isTruncatingStore()) {
1229 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001230 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001231 EVT MemVT = StoreNode->getMemoryVT();
1232 SDValue MaskConstant;
1233 if (MemVT == MVT::i8) {
1234 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1235 } else {
1236 assert(MemVT == MVT::i16);
1237 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1238 }
1239 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1240 DAG.getConstant(2, MVT::i32));
1241 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1242 DAG.getConstant(0x00000003, VT));
1243 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1244 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1245 DAG.getConstant(3, VT));
1246 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1247 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1248 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1249 // vector instead.
1250 SDValue Src[4] = {
1251 ShiftedValue,
1252 DAG.getConstant(0, MVT::i32),
1253 DAG.getConstant(0, MVT::i32),
1254 Mask
1255 };
Craig Topper48d114b2014-04-26 18:35:24 +00001256 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001257 SDValue Args[3] = { Chain, Input, DWordAddr };
1258 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001259 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001260 StoreNode->getMemOperand());
1261 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1262 Value.getValueType().bitsGE(MVT::i32)) {
1263 // Convert pointer from byte address to dword address.
1264 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1265 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1266 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001267
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001268 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001269 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001270 } else {
1271 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1272 }
1273 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001274 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001275 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001276
1277 EVT ValueVT = Value.getValueType();
1278
1279 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1280 return SDValue();
1281 }
1282
Tom Stellarde9373602014-01-22 19:24:14 +00001283 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1284 if (Ret.getNode()) {
1285 return Ret;
1286 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001287 // Lowering for indirect addressing
1288
1289 const MachineFunction &MF = DAG.getMachineFunction();
1290 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1291 getTargetMachine().getFrameLowering());
1292 unsigned StackWidth = TFL->getStackWidth(MF);
1293
1294 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1295
1296 if (ValueVT.isVector()) {
1297 unsigned NumElemVT = ValueVT.getVectorNumElements();
1298 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001299 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001300
1301 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1302 "vector width in load");
1303
1304 for (unsigned i = 0; i < NumElemVT; ++i) {
1305 unsigned Channel, PtrIncr;
1306 getStackAddress(StackWidth, i, Channel, PtrIncr);
1307 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1308 DAG.getConstant(PtrIncr, MVT::i32));
1309 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1310 Value, DAG.getConstant(i, MVT::i32));
1311
1312 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1313 Chain, Elem, Ptr,
1314 DAG.getTargetConstant(Channel, MVT::i32));
1315 }
Craig Topper48d114b2014-04-26 18:35:24 +00001316 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001317 } else {
1318 if (ValueVT == MVT::i8) {
1319 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1320 }
1321 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001322 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001323 }
1324
1325 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001326}
1327
Tom Stellard365366f2013-01-23 02:09:06 +00001328// return (512 + (kc_bank << 12)
1329static int
1330ConstantAddressBlock(unsigned AddressSpace) {
1331 switch (AddressSpace) {
1332 case AMDGPUAS::CONSTANT_BUFFER_0:
1333 return 512;
1334 case AMDGPUAS::CONSTANT_BUFFER_1:
1335 return 512 + 4096;
1336 case AMDGPUAS::CONSTANT_BUFFER_2:
1337 return 512 + 4096 * 2;
1338 case AMDGPUAS::CONSTANT_BUFFER_3:
1339 return 512 + 4096 * 3;
1340 case AMDGPUAS::CONSTANT_BUFFER_4:
1341 return 512 + 4096 * 4;
1342 case AMDGPUAS::CONSTANT_BUFFER_5:
1343 return 512 + 4096 * 5;
1344 case AMDGPUAS::CONSTANT_BUFFER_6:
1345 return 512 + 4096 * 6;
1346 case AMDGPUAS::CONSTANT_BUFFER_7:
1347 return 512 + 4096 * 7;
1348 case AMDGPUAS::CONSTANT_BUFFER_8:
1349 return 512 + 4096 * 8;
1350 case AMDGPUAS::CONSTANT_BUFFER_9:
1351 return 512 + 4096 * 9;
1352 case AMDGPUAS::CONSTANT_BUFFER_10:
1353 return 512 + 4096 * 10;
1354 case AMDGPUAS::CONSTANT_BUFFER_11:
1355 return 512 + 4096 * 11;
1356 case AMDGPUAS::CONSTANT_BUFFER_12:
1357 return 512 + 4096 * 12;
1358 case AMDGPUAS::CONSTANT_BUFFER_13:
1359 return 512 + 4096 * 13;
1360 case AMDGPUAS::CONSTANT_BUFFER_14:
1361 return 512 + 4096 * 14;
1362 case AMDGPUAS::CONSTANT_BUFFER_15:
1363 return 512 + 4096 * 15;
1364 default:
1365 return -1;
1366 }
1367}
1368
1369SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1370{
1371 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001372 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001373 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1374 SDValue Chain = Op.getOperand(0);
1375 SDValue Ptr = Op.getOperand(1);
1376 SDValue LoweredLoad;
1377
Tom Stellarde9373602014-01-22 19:24:14 +00001378 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1379 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001380 SDValue Ops[2] = {
1381 Ret,
1382 Chain
1383 };
Craig Topper64941d92014-04-27 19:20:57 +00001384 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001385 }
1386
1387
Tom Stellard35bb18c2013-08-26 15:06:04 +00001388 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1389 SDValue MergedValues[2] = {
1390 SplitVectorLoad(Op, DAG),
1391 Chain
1392 };
Craig Topper64941d92014-04-27 19:20:57 +00001393 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001394 }
1395
Tom Stellard365366f2013-01-23 02:09:06 +00001396 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001397 if (ConstantBlock > -1 &&
1398 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1399 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001400 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001401 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1402 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001403 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001404 SDValue Slots[4];
1405 for (unsigned i = 0; i < 4; i++) {
1406 // We want Const position encoded with the following formula :
1407 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1408 // const_index is Ptr computed by llvm using an alignment of 16.
1409 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1410 // then div by 4 at the ISel step
1411 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1412 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1413 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1414 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001415 EVT NewVT = MVT::v4i32;
1416 unsigned NumElements = 4;
1417 if (VT.isVector()) {
1418 NewVT = VT;
1419 NumElements = VT.getVectorNumElements();
1420 }
Craig Topper48d114b2014-04-26 18:35:24 +00001421 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001422 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001423 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001424 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001425 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001426 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001427 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001428 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001429 );
1430 }
1431
1432 if (!VT.isVector()) {
1433 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1434 DAG.getConstant(0, MVT::i32));
1435 }
1436
1437 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001438 Result,
1439 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001440 };
Craig Topper64941d92014-04-27 19:20:57 +00001441 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001442 }
1443
Matt Arsenault909d0c02013-10-30 23:43:29 +00001444 // For most operations returning SDValue() will result in the node being
1445 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1446 // need to manually expand loads that may be legal in some address spaces and
1447 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1448 // compute shaders, since the data is sign extended when it is uploaded to the
1449 // buffer. However SEXT loads from other address spaces are not supported, so
1450 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001451 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1452 EVT MemVT = LoadNode->getMemoryVT();
1453 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1454 SDValue ShiftAmount =
1455 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1456 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1457 LoadNode->getPointerInfo(), MemVT,
1458 LoadNode->isVolatile(),
1459 LoadNode->isNonTemporal(),
1460 LoadNode->getAlignment());
1461 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1462 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1463
1464 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001465 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001466 }
1467
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001468 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1469 return SDValue();
1470 }
1471
1472 // Lowering for indirect addressing
1473 const MachineFunction &MF = DAG.getMachineFunction();
1474 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1475 getTargetMachine().getFrameLowering());
1476 unsigned StackWidth = TFL->getStackWidth(MF);
1477
1478 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1479
1480 if (VT.isVector()) {
1481 unsigned NumElemVT = VT.getVectorNumElements();
1482 EVT ElemVT = VT.getVectorElementType();
1483 SDValue Loads[4];
1484
1485 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1486 "vector width in load");
1487
1488 for (unsigned i = 0; i < NumElemVT; ++i) {
1489 unsigned Channel, PtrIncr;
1490 getStackAddress(StackWidth, i, Channel, PtrIncr);
1491 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1492 DAG.getConstant(PtrIncr, MVT::i32));
1493 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1494 Chain, Ptr,
1495 DAG.getTargetConstant(Channel, MVT::i32),
1496 Op.getOperand(2));
1497 }
1498 for (unsigned i = NumElemVT; i < 4; ++i) {
1499 Loads[i] = DAG.getUNDEF(ElemVT);
1500 }
1501 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001502 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001503 } else {
1504 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1505 Chain, Ptr,
1506 DAG.getTargetConstant(0, MVT::i32), // Channel
1507 Op.getOperand(2));
1508 }
1509
Matt Arsenault7939acd2014-04-07 16:44:24 +00001510 SDValue Ops[2] = {
1511 LoweredLoad,
1512 Chain
1513 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001514
Craig Topper64941d92014-04-27 19:20:57 +00001515 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001516}
Tom Stellard75aadc22012-12-11 21:25:42 +00001517
Tom Stellard75aadc22012-12-11 21:25:42 +00001518/// XXX Only kernel functions are supported, so we can assume for now that
1519/// every function is a kernel function, but in the future we should use
1520/// separate calling conventions for kernel and non-kernel functions.
1521SDValue R600TargetLowering::LowerFormalArguments(
1522 SDValue Chain,
1523 CallingConv::ID CallConv,
1524 bool isVarArg,
1525 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001526 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001527 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001528 SmallVector<CCValAssign, 16> ArgLocs;
1529 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1530 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001531 MachineFunction &MF = DAG.getMachineFunction();
1532 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001533
Tom Stellardaf775432013-10-23 00:44:32 +00001534 SmallVector<ISD::InputArg, 8> LocalIns;
1535
Matt Arsenault209a7b92014-04-18 07:40:20 +00001536 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001537
1538 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001539
Tom Stellard1e803092013-07-23 01:48:18 +00001540 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001541 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001542 EVT VT = Ins[i].VT;
1543 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001544
Vincent Lejeunef143af32013-11-11 22:10:24 +00001545 if (ShaderType != ShaderType::COMPUTE) {
1546 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1547 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1548 InVals.push_back(Register);
1549 continue;
1550 }
1551
Tom Stellard75aadc22012-12-11 21:25:42 +00001552 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001553 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001554
Matt Arsenaultfae02982014-03-17 18:58:11 +00001555 // i64 isn't a legal type, so the register type used ends up as i32, which
1556 // isn't expected here. It attempts to create this sextload, but it ends up
1557 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1558 // for <1 x i64>.
1559
Tom Stellardacfeebf2013-07-23 01:48:05 +00001560 // The first 36 bytes of the input buffer contains information about
1561 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001562
1563 // FIXME: This should really check the extload type, but the handling of
1564 // extload vecto parameters seems to be broken.
1565 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1566 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1567 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001568 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1569 MachinePointerInfo(UndefValue::get(PtrTy)),
1570 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001571
1572 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001573 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001574 }
1575 return Chain;
1576}
1577
Matt Arsenault758659232013-05-18 00:21:46 +00001578EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001579 if (!VT.isVector())
1580 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001581 return VT.changeVectorElementTypeToInteger();
1582}
1583
Matt Arsenault209a7b92014-04-18 07:40:20 +00001584static SDValue CompactSwizzlableVector(
1585 SelectionDAG &DAG, SDValue VectorEntry,
1586 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001587 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1588 assert(RemapSwizzle.empty());
1589 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001590 VectorEntry.getOperand(0),
1591 VectorEntry.getOperand(1),
1592 VectorEntry.getOperand(2),
1593 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001594 };
1595
1596 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001597 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1598 // We mask write here to teach later passes that the ith element of this
1599 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1600 // break false dependencies and additionnaly make assembly easier to read.
1601 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001602 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1603 if (C->isZero()) {
1604 RemapSwizzle[i] = 4; // SEL_0
1605 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1606 } else if (C->isExactlyValue(1.0)) {
1607 RemapSwizzle[i] = 5; // SEL_1
1608 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1609 }
1610 }
1611
1612 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1613 continue;
1614 for (unsigned j = 0; j < i; j++) {
1615 if (NewBldVec[i] == NewBldVec[j]) {
1616 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1617 RemapSwizzle[i] = j;
1618 break;
1619 }
1620 }
1621 }
1622
1623 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001624 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001625}
1626
Benjamin Kramer193960c2013-06-11 13:32:25 +00001627static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1628 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001629 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1630 assert(RemapSwizzle.empty());
1631 SDValue NewBldVec[4] = {
1632 VectorEntry.getOperand(0),
1633 VectorEntry.getOperand(1),
1634 VectorEntry.getOperand(2),
1635 VectorEntry.getOperand(3)
1636 };
1637 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001638 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001639 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001640 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1641 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1642 ->getZExtValue();
1643 if (i == Idx)
1644 isUnmovable[Idx] = true;
1645 }
1646 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001647
1648 for (unsigned i = 0; i < 4; i++) {
1649 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1650 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1651 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001652 if (isUnmovable[Idx])
1653 continue;
1654 // Swap i and Idx
1655 std::swap(NewBldVec[Idx], NewBldVec[i]);
1656 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1657 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001658 }
1659 }
1660
1661 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001662 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001663}
1664
1665
1666SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1667SDValue Swz[4], SelectionDAG &DAG) const {
1668 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1669 // Old -> New swizzle values
1670 DenseMap<unsigned, unsigned> SwizzleRemap;
1671
1672 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1673 for (unsigned i = 0; i < 4; i++) {
1674 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1675 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1676 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1677 }
1678
1679 SwizzleRemap.clear();
1680 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1681 for (unsigned i = 0; i < 4; i++) {
1682 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1683 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1684 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1685 }
1686
1687 return BuildVector;
1688}
1689
1690
Tom Stellard75aadc22012-12-11 21:25:42 +00001691//===----------------------------------------------------------------------===//
1692// Custom DAG Optimizations
1693//===----------------------------------------------------------------------===//
1694
1695SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1696 DAGCombinerInfo &DCI) const {
1697 SelectionDAG &DAG = DCI.DAG;
1698
1699 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001700 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001701 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1702 case ISD::FP_ROUND: {
1703 SDValue Arg = N->getOperand(0);
1704 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001705 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001706 Arg.getOperand(0));
1707 }
1708 break;
1709 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001710
1711 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1712 // (i32 select_cc f32, f32, -1, 0 cc)
1713 //
1714 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1715 // this to one of the SET*_DX10 instructions.
1716 case ISD::FP_TO_SINT: {
1717 SDValue FNeg = N->getOperand(0);
1718 if (FNeg.getOpcode() != ISD::FNEG) {
1719 return SDValue();
1720 }
1721 SDValue SelectCC = FNeg.getOperand(0);
1722 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1723 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1724 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1725 !isHWTrueValue(SelectCC.getOperand(2)) ||
1726 !isHWFalseValue(SelectCC.getOperand(3))) {
1727 return SDValue();
1728 }
1729
Andrew Trickef9de2a2013-05-25 02:42:55 +00001730 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001731 SelectCC.getOperand(0), // LHS
1732 SelectCC.getOperand(1), // RHS
1733 DAG.getConstant(-1, MVT::i32), // True
1734 DAG.getConstant(0, MVT::i32), // Flase
1735 SelectCC.getOperand(4)); // CC
1736
1737 break;
1738 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001739
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001740 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1741 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001742 case ISD::INSERT_VECTOR_ELT: {
1743 SDValue InVec = N->getOperand(0);
1744 SDValue InVal = N->getOperand(1);
1745 SDValue EltNo = N->getOperand(2);
1746 SDLoc dl(N);
1747
1748 // If the inserted element is an UNDEF, just use the input vector.
1749 if (InVal.getOpcode() == ISD::UNDEF)
1750 return InVec;
1751
1752 EVT VT = InVec.getValueType();
1753
1754 // If we can't generate a legal BUILD_VECTOR, exit
1755 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1756 return SDValue();
1757
1758 // Check that we know which element is being inserted
1759 if (!isa<ConstantSDNode>(EltNo))
1760 return SDValue();
1761 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1762
1763 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1764 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1765 // vector elements.
1766 SmallVector<SDValue, 8> Ops;
1767 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1768 Ops.append(InVec.getNode()->op_begin(),
1769 InVec.getNode()->op_end());
1770 } else if (InVec.getOpcode() == ISD::UNDEF) {
1771 unsigned NElts = VT.getVectorNumElements();
1772 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1773 } else {
1774 return SDValue();
1775 }
1776
1777 // Insert the element
1778 if (Elt < Ops.size()) {
1779 // All the operands of BUILD_VECTOR must have the same type;
1780 // we enforce that here.
1781 EVT OpVT = Ops[0].getValueType();
1782 if (InVal.getValueType() != OpVT)
1783 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1784 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1785 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1786 Ops[Elt] = InVal;
1787 }
1788
1789 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001790 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001791 }
1792
Tom Stellard365366f2013-01-23 02:09:06 +00001793 // Extract_vec (Build_vector) generated by custom lowering
1794 // also needs to be customly combined
1795 case ISD::EXTRACT_VECTOR_ELT: {
1796 SDValue Arg = N->getOperand(0);
1797 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1798 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1799 unsigned Element = Const->getZExtValue();
1800 return Arg->getOperand(Element);
1801 }
1802 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001803 if (Arg.getOpcode() == ISD::BITCAST &&
1804 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1805 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1806 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001807 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001808 Arg->getOperand(0).getOperand(Element));
1809 }
1810 }
Tom Stellard365366f2013-01-23 02:09:06 +00001811 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001812
1813 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001814 // Try common optimizations
1815 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1816 if (Ret.getNode())
1817 return Ret;
1818
Tom Stellarde06163a2013-02-07 14:02:35 +00001819 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1820 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001821 //
1822 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1823 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001824 SDValue LHS = N->getOperand(0);
1825 if (LHS.getOpcode() != ISD::SELECT_CC) {
1826 return SDValue();
1827 }
1828
1829 SDValue RHS = N->getOperand(1);
1830 SDValue True = N->getOperand(2);
1831 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001832 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001833
1834 if (LHS.getOperand(2).getNode() != True.getNode() ||
1835 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001836 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001837 return SDValue();
1838 }
1839
Tom Stellard5e524892013-03-08 15:37:11 +00001840 switch (NCC) {
1841 default: return SDValue();
1842 case ISD::SETNE: return LHS;
1843 case ISD::SETEQ: {
1844 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1845 LHSCC = ISD::getSetCCInverse(LHSCC,
1846 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001847 if (DCI.isBeforeLegalizeOps() ||
1848 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1849 return DAG.getSelectCC(SDLoc(N),
1850 LHS.getOperand(0),
1851 LHS.getOperand(1),
1852 LHS.getOperand(2),
1853 LHS.getOperand(3),
1854 LHSCC);
1855 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001856 }
Tom Stellard5e524892013-03-08 15:37:11 +00001857 }
Tom Stellardcd428182013-09-28 02:50:38 +00001858 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001859 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001860
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001861 case AMDGPUISD::EXPORT: {
1862 SDValue Arg = N->getOperand(1);
1863 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1864 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001865
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001866 SDValue NewArgs[8] = {
1867 N->getOperand(0), // Chain
1868 SDValue(),
1869 N->getOperand(2), // ArrayBase
1870 N->getOperand(3), // Type
1871 N->getOperand(4), // SWZ_X
1872 N->getOperand(5), // SWZ_Y
1873 N->getOperand(6), // SWZ_Z
1874 N->getOperand(7) // SWZ_W
1875 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001876 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001877 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001878 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001879 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001880 case AMDGPUISD::TEXTURE_FETCH: {
1881 SDValue Arg = N->getOperand(1);
1882 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1883 break;
1884
1885 SDValue NewArgs[19] = {
1886 N->getOperand(0),
1887 N->getOperand(1),
1888 N->getOperand(2),
1889 N->getOperand(3),
1890 N->getOperand(4),
1891 N->getOperand(5),
1892 N->getOperand(6),
1893 N->getOperand(7),
1894 N->getOperand(8),
1895 N->getOperand(9),
1896 N->getOperand(10),
1897 N->getOperand(11),
1898 N->getOperand(12),
1899 N->getOperand(13),
1900 N->getOperand(14),
1901 N->getOperand(15),
1902 N->getOperand(16),
1903 N->getOperand(17),
1904 N->getOperand(18),
1905 };
1906 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1907 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00001908 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001909 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001910 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00001911
1912 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001913}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001914
1915static bool
1916FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001917 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001918 const R600InstrInfo *TII =
1919 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1920 if (!Src.isMachineOpcode())
1921 return false;
1922 switch (Src.getMachineOpcode()) {
1923 case AMDGPU::FNEG_R600:
1924 if (!Neg.getNode())
1925 return false;
1926 Src = Src.getOperand(0);
1927 Neg = DAG.getTargetConstant(1, MVT::i32);
1928 return true;
1929 case AMDGPU::FABS_R600:
1930 if (!Abs.getNode())
1931 return false;
1932 Src = Src.getOperand(0);
1933 Abs = DAG.getTargetConstant(1, MVT::i32);
1934 return true;
1935 case AMDGPU::CONST_COPY: {
1936 unsigned Opcode = ParentNode->getMachineOpcode();
1937 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1938
1939 if (!Sel.getNode())
1940 return false;
1941
1942 SDValue CstOffset = Src.getOperand(0);
1943 if (ParentNode->getValueType(0).isVector())
1944 return false;
1945
1946 // Gather constants values
1947 int SrcIndices[] = {
1948 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1949 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1950 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1951 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1952 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1953 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1954 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1955 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1956 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1957 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1958 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1959 };
1960 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00001961 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001962 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1963 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1964 continue;
1965 if (HasDst) {
1966 OtherSrcIdx--;
1967 OtherSelIdx--;
1968 }
1969 if (RegisterSDNode *Reg =
1970 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1971 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00001972 ConstantSDNode *Cst
1973 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001974 Consts.push_back(Cst->getZExtValue());
1975 }
1976 }
1977 }
1978
Matt Arsenault37c12d72014-05-12 20:42:57 +00001979 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001980 Consts.push_back(Cst->getZExtValue());
1981 if (!TII->fitsConstReadLimitations(Consts)) {
1982 return false;
1983 }
1984
1985 Sel = CstOffset;
1986 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1987 return true;
1988 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001989 case AMDGPU::MOV_IMM_I32:
1990 case AMDGPU::MOV_IMM_F32: {
1991 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1992 uint64_t ImmValue = 0;
1993
1994
1995 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1996 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1997 float FloatValue = FPC->getValueAPF().convertToFloat();
1998 if (FloatValue == 0.0) {
1999 ImmReg = AMDGPU::ZERO;
2000 } else if (FloatValue == 0.5) {
2001 ImmReg = AMDGPU::HALF;
2002 } else if (FloatValue == 1.0) {
2003 ImmReg = AMDGPU::ONE;
2004 } else {
2005 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2006 }
2007 } else {
2008 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2009 uint64_t Value = C->getZExtValue();
2010 if (Value == 0) {
2011 ImmReg = AMDGPU::ZERO;
2012 } else if (Value == 1) {
2013 ImmReg = AMDGPU::ONE_INT;
2014 } else {
2015 ImmValue = Value;
2016 }
2017 }
2018
2019 // Check that we aren't already using an immediate.
2020 // XXX: It's possible for an instruction to have more than one
2021 // immediate operand, but this is not supported yet.
2022 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2023 if (!Imm.getNode())
2024 return false;
2025 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2026 assert(C);
2027 if (C->getZExtValue())
2028 return false;
2029 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2030 }
2031 Src = DAG.getRegister(ImmReg, MVT::i32);
2032 return true;
2033 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002034 default:
2035 return false;
2036 }
2037}
2038
2039
2040/// \brief Fold the instructions after selecting them
2041SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2042 SelectionDAG &DAG) const {
2043 const R600InstrInfo *TII =
2044 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2045 if (!Node->isMachineOpcode())
2046 return Node;
2047 unsigned Opcode = Node->getMachineOpcode();
2048 SDValue FakeOp;
2049
2050 std::vector<SDValue> Ops;
2051 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
2052 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002053 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002054
2055 if (Opcode == AMDGPU::DOT_4) {
2056 int OperandIdx[] = {
2057 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2058 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2059 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2060 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2061 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2062 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2063 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2064 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002065 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002066 int NegIdx[] = {
2067 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2068 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2069 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2070 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2071 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2072 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2073 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2074 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2075 };
2076 int AbsIdx[] = {
2077 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2078 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2079 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2080 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2081 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2082 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2085 };
2086 for (unsigned i = 0; i < 8; i++) {
2087 if (OperandIdx[i] < 0)
2088 return Node;
2089 SDValue &Src = Ops[OperandIdx[i] - 1];
2090 SDValue &Neg = Ops[NegIdx[i] - 1];
2091 SDValue &Abs = Ops[AbsIdx[i] - 1];
2092 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2093 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2094 if (HasDst)
2095 SelIdx--;
2096 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002097 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2098 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2099 }
2100 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2101 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2102 SDValue &Src = Ops[i];
2103 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002104 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2105 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002106 } else if (Opcode == AMDGPU::CLAMP_R600) {
2107 SDValue Src = Node->getOperand(0);
2108 if (!Src.isMachineOpcode() ||
2109 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2110 return Node;
2111 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2112 AMDGPU::OpName::clamp);
2113 if (ClampIdx < 0)
2114 return Node;
2115 std::vector<SDValue> Ops;
2116 unsigned NumOp = Src.getNumOperands();
2117 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002118 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002119 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2120 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2121 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002122 } else {
2123 if (!TII->hasInstrModifiers(Opcode))
2124 return Node;
2125 int OperandIdx[] = {
2126 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2127 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2128 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2129 };
2130 int NegIdx[] = {
2131 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2132 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2133 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2134 };
2135 int AbsIdx[] = {
2136 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2137 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2138 -1
2139 };
2140 for (unsigned i = 0; i < 3; i++) {
2141 if (OperandIdx[i] < 0)
2142 return Node;
2143 SDValue &Src = Ops[OperandIdx[i] - 1];
2144 SDValue &Neg = Ops[NegIdx[i] - 1];
2145 SDValue FakeAbs;
2146 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2147 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2148 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002149 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2150 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002151 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002152 ImmIdx--;
2153 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002154 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002155 SDValue &Imm = Ops[ImmIdx];
2156 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002157 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2158 }
2159 }
2160
2161 return Node;
2162}