blob: f0e13e56d8ffd038e9a3f5699c56892d0bc55886 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDILIntrinsicInfo.h"
17#include "AMDGPUFrameLowering.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000071
72 setOperationAction(ISD::FSUB, MVT::f32, Expand);
73
74 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
75 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000077
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
79 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
80
Tom Stellarde8f9f282013-03-08 15:37:05 +000081 setOperationAction(ISD::SETCC, MVT::i32, Expand);
82 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000083 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
84
Tom Stellard53f2f902013-09-05 18:38:03 +000085 setOperationAction(ISD::SELECT, MVT::i32, Expand);
86 setOperationAction(ISD::SELECT, MVT::f32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000088 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Matt Arsenault4e466652014-04-16 01:41:30 +000090 // Expand sign extension of vectors
91 if (!Subtarget->hasBFE())
92 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
93
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
96
97 if (!Subtarget->hasBFE())
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
101
102 if (!Subtarget->hasBFE())
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
106
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
112
113
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000114 // Legalize loads and stores to the private address space.
115 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000116 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000117 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000118
119 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
120 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000121 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
122 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000125 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
127
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000128 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000129 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000130 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000131 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000132 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
133 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
Tom Stellard365366f2013-01-23 02:09:06 +0000135 setOperationAction(ISD::LOAD, MVT::i32, Custom);
136 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000137 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
138
Tom Stellard880a80a2014-06-17 16:53:14 +0000139 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
140 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
141 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
143
144 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
145 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
147 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
148
Tom Stellard75aadc22012-12-11 21:25:42 +0000149 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000150 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000151 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000152 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000153 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000154
Tom Stellard5f337882014-04-29 23:12:43 +0000155 // These should be replaced by UDVIREM, but it does not happen automatically
156 // during Type Legalization
157 setOperationAction(ISD::UDIV, MVT::i64, Custom);
158 setOperationAction(ISD::UREM, MVT::i64, Custom);
159
Jan Vesely25f36272014-06-18 12:27:13 +0000160 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
161 // to be Legal/Custom in order to avoid library calls.
162 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000163 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000164 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000165
Michel Danzer49812b52013-07-10 16:37:07 +0000166 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
167
Tom Stellardb852af52013-03-08 15:37:03 +0000168 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000169 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000170 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000171}
172
173MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
174 MachineInstr * MI, MachineBasicBlock * BB) const {
175 MachineFunction * MF = BB->getParent();
176 MachineRegisterInfo &MRI = MF->getRegInfo();
177 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000178 const R600InstrInfo *TII =
179 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000180
181 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000182 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000183 // Replace LDS_*_RET instruction that don't have any uses with the
184 // equivalent LDS_*_NORET instruction.
185 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000186 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
187 assert(DstIdx != -1);
188 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000189 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
190 return BB;
191
192 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
193 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000194 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
195 NewMI.addOperand(MI->getOperand(i));
196 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000197 } else {
198 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
199 }
200 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000201 case AMDGPU::CLAMP_R600: {
202 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
203 AMDGPU::MOV,
204 MI->getOperand(0).getReg(),
205 MI->getOperand(1).getReg());
206 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
207 break;
208 }
209
210 case AMDGPU::FABS_R600: {
211 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
212 AMDGPU::MOV,
213 MI->getOperand(0).getReg(),
214 MI->getOperand(1).getReg());
215 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
216 break;
217 }
218
219 case AMDGPU::FNEG_R600: {
220 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
221 AMDGPU::MOV,
222 MI->getOperand(0).getReg(),
223 MI->getOperand(1).getReg());
224 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
225 break;
226 }
227
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::MASK_WRITE: {
229 unsigned maskedRegister = MI->getOperand(0).getReg();
230 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
231 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
232 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
233 break;
234 }
235
236 case AMDGPU::MOV_IMM_F32:
237 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
238 MI->getOperand(1).getFPImm()->getValueAPF()
239 .bitcastToAPInt().getZExtValue());
240 break;
241 case AMDGPU::MOV_IMM_I32:
242 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
243 MI->getOperand(1).getImm());
244 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000245 case AMDGPU::CONST_COPY: {
246 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
247 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000248 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000249 MI->getOperand(1).getImm());
250 break;
251 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000252
253 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000254 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000256 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000257
258 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
259 .addOperand(MI->getOperand(0))
260 .addOperand(MI->getOperand(1))
261 .addImm(EOP); // Set End of program bit
262 break;
263 }
264
Tom Stellard75aadc22012-12-11 21:25:42 +0000265 case AMDGPU::TXD: {
266 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
267 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000268 MachineOperand &RID = MI->getOperand(4);
269 MachineOperand &SID = MI->getOperand(5);
270 unsigned TextureId = MI->getOperand(6).getImm();
271 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
272 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000273
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000274 switch (TextureId) {
275 case 5: // Rect
276 CTX = CTY = 0;
277 break;
278 case 6: // Shadow1D
279 SrcW = SrcZ;
280 break;
281 case 7: // Shadow2D
282 SrcW = SrcZ;
283 break;
284 case 8: // ShadowRect
285 CTX = CTY = 0;
286 SrcW = SrcZ;
287 break;
288 case 9: // 1DArray
289 SrcZ = SrcY;
290 CTZ = 0;
291 break;
292 case 10: // 2DArray
293 CTZ = 0;
294 break;
295 case 11: // Shadow1DArray
296 SrcZ = SrcY;
297 CTZ = 0;
298 break;
299 case 12: // Shadow2DArray
300 CTZ = 0;
301 break;
302 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000303 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
304 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000305 .addImm(SrcX)
306 .addImm(SrcY)
307 .addImm(SrcZ)
308 .addImm(SrcW)
309 .addImm(0)
310 .addImm(0)
311 .addImm(0)
312 .addImm(0)
313 .addImm(1)
314 .addImm(2)
315 .addImm(3)
316 .addOperand(RID)
317 .addOperand(SID)
318 .addImm(CTX)
319 .addImm(CTY)
320 .addImm(CTZ)
321 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
323 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000324 .addImm(SrcX)
325 .addImm(SrcY)
326 .addImm(SrcZ)
327 .addImm(SrcW)
328 .addImm(0)
329 .addImm(0)
330 .addImm(0)
331 .addImm(0)
332 .addImm(1)
333 .addImm(2)
334 .addImm(3)
335 .addOperand(RID)
336 .addOperand(SID)
337 .addImm(CTX)
338 .addImm(CTY)
339 .addImm(CTZ)
340 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000341 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
342 .addOperand(MI->getOperand(0))
343 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000344 .addImm(SrcX)
345 .addImm(SrcY)
346 .addImm(SrcZ)
347 .addImm(SrcW)
348 .addImm(0)
349 .addImm(0)
350 .addImm(0)
351 .addImm(0)
352 .addImm(1)
353 .addImm(2)
354 .addImm(3)
355 .addOperand(RID)
356 .addOperand(SID)
357 .addImm(CTX)
358 .addImm(CTY)
359 .addImm(CTZ)
360 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000361 .addReg(T0, RegState::Implicit)
362 .addReg(T1, RegState::Implicit);
363 break;
364 }
365
366 case AMDGPU::TXD_SHADOW: {
367 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
368 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000369 MachineOperand &RID = MI->getOperand(4);
370 MachineOperand &SID = MI->getOperand(5);
371 unsigned TextureId = MI->getOperand(6).getImm();
372 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
373 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
374
375 switch (TextureId) {
376 case 5: // Rect
377 CTX = CTY = 0;
378 break;
379 case 6: // Shadow1D
380 SrcW = SrcZ;
381 break;
382 case 7: // Shadow2D
383 SrcW = SrcZ;
384 break;
385 case 8: // ShadowRect
386 CTX = CTY = 0;
387 SrcW = SrcZ;
388 break;
389 case 9: // 1DArray
390 SrcZ = SrcY;
391 CTZ = 0;
392 break;
393 case 10: // 2DArray
394 CTZ = 0;
395 break;
396 case 11: // Shadow1DArray
397 SrcZ = SrcY;
398 CTZ = 0;
399 break;
400 case 12: // Shadow2DArray
401 CTZ = 0;
402 break;
403 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000404
405 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
406 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000407 .addImm(SrcX)
408 .addImm(SrcY)
409 .addImm(SrcZ)
410 .addImm(SrcW)
411 .addImm(0)
412 .addImm(0)
413 .addImm(0)
414 .addImm(0)
415 .addImm(1)
416 .addImm(2)
417 .addImm(3)
418 .addOperand(RID)
419 .addOperand(SID)
420 .addImm(CTX)
421 .addImm(CTY)
422 .addImm(CTZ)
423 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000424 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
425 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000426 .addImm(SrcX)
427 .addImm(SrcY)
428 .addImm(SrcZ)
429 .addImm(SrcW)
430 .addImm(0)
431 .addImm(0)
432 .addImm(0)
433 .addImm(0)
434 .addImm(1)
435 .addImm(2)
436 .addImm(3)
437 .addOperand(RID)
438 .addOperand(SID)
439 .addImm(CTX)
440 .addImm(CTY)
441 .addImm(CTZ)
442 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
444 .addOperand(MI->getOperand(0))
445 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000446 .addImm(SrcX)
447 .addImm(SrcY)
448 .addImm(SrcZ)
449 .addImm(SrcW)
450 .addImm(0)
451 .addImm(0)
452 .addImm(0)
453 .addImm(0)
454 .addImm(1)
455 .addImm(2)
456 .addImm(3)
457 .addOperand(RID)
458 .addOperand(SID)
459 .addImm(CTX)
460 .addImm(CTY)
461 .addImm(CTZ)
462 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000463 .addReg(T0, RegState::Implicit)
464 .addReg(T1, RegState::Implicit);
465 break;
466 }
467
468 case AMDGPU::BRANCH:
469 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000470 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 break;
472
473 case AMDGPU::BRANCH_COND_f32: {
474 MachineInstr *NewMI =
475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
476 AMDGPU::PREDICATE_BIT)
477 .addOperand(MI->getOperand(1))
478 .addImm(OPCODE_IS_NOT_ZERO)
479 .addImm(0); // Flags
480 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000481 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 .addOperand(MI->getOperand(0))
483 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
484 break;
485 }
486
487 case AMDGPU::BRANCH_COND_i32: {
488 MachineInstr *NewMI =
489 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
490 AMDGPU::PREDICATE_BIT)
491 .addOperand(MI->getOperand(1))
492 .addImm(OPCODE_IS_NOT_ZERO_INT)
493 .addImm(0); // Flags
494 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000495 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addOperand(MI->getOperand(0))
497 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
498 break;
499 }
500
Tom Stellard75aadc22012-12-11 21:25:42 +0000501 case AMDGPU::EG_ExportSwz:
502 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000503 // Instruction is left unmodified if its not the last one of its type
504 bool isLastInstructionOfItsType = true;
505 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000506 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000507 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000508 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000509 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
510 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
511 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
512 .getImm();
513 if (CurrentInstExportType == InstExportType) {
514 isLastInstructionOfItsType = false;
515 break;
516 }
517 }
518 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000519 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000520 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000521 return BB;
522 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
523 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
524 .addOperand(MI->getOperand(0))
525 .addOperand(MI->getOperand(1))
526 .addOperand(MI->getOperand(2))
527 .addOperand(MI->getOperand(3))
528 .addOperand(MI->getOperand(4))
529 .addOperand(MI->getOperand(5))
530 .addOperand(MI->getOperand(6))
531 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000532 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000533 break;
534 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000535 case AMDGPU::RETURN: {
536 // RETURN instructions must have the live-out registers as implicit uses,
537 // otherwise they appear dead.
538 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
539 MachineInstrBuilder MIB(*MF, MI);
540 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
541 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
542 return BB;
543 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 }
545
546 MI->eraseFromParent();
547 return BB;
548}
549
550//===----------------------------------------------------------------------===//
551// Custom DAG Lowering Operations
552//===----------------------------------------------------------------------===//
553
Tom Stellard75aadc22012-12-11 21:25:42 +0000554SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000555 MachineFunction &MF = DAG.getMachineFunction();
556 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 switch (Op.getOpcode()) {
558 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000559 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
560 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000561 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000562 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000563 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000564 case ISD::FCOS:
565 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000567 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000568 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000569 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000570 case ISD::INTRINSIC_VOID: {
571 SDValue Chain = Op.getOperand(0);
572 unsigned IntrinsicID =
573 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
574 switch (IntrinsicID) {
575 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000576 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
577 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000578 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000579 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000581 case AMDGPUIntrinsic::R600_store_swizzle: {
582 const SDValue Args[8] = {
583 Chain,
584 Op.getOperand(2), // Export Value
585 Op.getOperand(3), // ArrayBase
586 Op.getOperand(4), // Type
587 DAG.getConstant(0, MVT::i32), // SWZ_X
588 DAG.getConstant(1, MVT::i32), // SWZ_Y
589 DAG.getConstant(2, MVT::i32), // SWZ_Z
590 DAG.getConstant(3, MVT::i32) // SWZ_W
591 };
Craig Topper48d114b2014-04-26 18:35:24 +0000592 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000593 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000594
Tom Stellard75aadc22012-12-11 21:25:42 +0000595 // default for switch(IntrinsicID)
596 default: break;
597 }
598 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
599 break;
600 }
601 case ISD::INTRINSIC_WO_CHAIN: {
602 unsigned IntrinsicID =
603 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
604 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000605 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000606 switch(IntrinsicID) {
607 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000608 case AMDGPUIntrinsic::R600_load_input: {
609 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
610 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
611 MachineFunction &MF = DAG.getMachineFunction();
612 MachineRegisterInfo &MRI = MF.getRegInfo();
613 MRI.addLiveIn(Reg);
614 return DAG.getCopyFromReg(DAG.getEntryNode(),
615 SDLoc(DAG.getEntryNode()), Reg, VT);
616 }
617
618 case AMDGPUIntrinsic::R600_interp_input: {
619 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
620 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
621 MachineSDNode *interp;
622 if (ijb < 0) {
623 const MachineFunction &MF = DAG.getMachineFunction();
624 const R600InstrInfo *TII =
625 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
626 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
627 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
628 return DAG.getTargetExtractSubreg(
629 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
630 DL, MVT::f32, SDValue(interp, 0));
631 }
632 MachineFunction &MF = DAG.getMachineFunction();
633 MachineRegisterInfo &MRI = MF.getRegInfo();
634 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
635 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
636 MRI.addLiveIn(RegisterI);
637 MRI.addLiveIn(RegisterJ);
638 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
639 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
640 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
641 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
642
643 if (slot % 4 < 2)
644 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
645 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
646 RegisterJNode, RegisterINode);
647 else
648 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
649 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
650 RegisterJNode, RegisterINode);
651 return SDValue(interp, slot % 2);
652 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000653 case AMDGPUIntrinsic::R600_interp_xy:
654 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000655 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000656 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000657 SDValue RegisterINode = Op.getOperand(2);
658 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000659
Vincent Lejeunef143af32013-11-11 22:10:24 +0000660 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000661 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000662 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000663 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000664 else
665 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000666 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000667 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000668 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
669 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000670 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000671 case AMDGPUIntrinsic::R600_tex:
672 case AMDGPUIntrinsic::R600_texc:
673 case AMDGPUIntrinsic::R600_txl:
674 case AMDGPUIntrinsic::R600_txlc:
675 case AMDGPUIntrinsic::R600_txb:
676 case AMDGPUIntrinsic::R600_txbc:
677 case AMDGPUIntrinsic::R600_txf:
678 case AMDGPUIntrinsic::R600_txq:
679 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000680 case AMDGPUIntrinsic::R600_ddy:
681 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000682 unsigned TextureOp;
683 switch (IntrinsicID) {
684 case AMDGPUIntrinsic::R600_tex:
685 TextureOp = 0;
686 break;
687 case AMDGPUIntrinsic::R600_texc:
688 TextureOp = 1;
689 break;
690 case AMDGPUIntrinsic::R600_txl:
691 TextureOp = 2;
692 break;
693 case AMDGPUIntrinsic::R600_txlc:
694 TextureOp = 3;
695 break;
696 case AMDGPUIntrinsic::R600_txb:
697 TextureOp = 4;
698 break;
699 case AMDGPUIntrinsic::R600_txbc:
700 TextureOp = 5;
701 break;
702 case AMDGPUIntrinsic::R600_txf:
703 TextureOp = 6;
704 break;
705 case AMDGPUIntrinsic::R600_txq:
706 TextureOp = 7;
707 break;
708 case AMDGPUIntrinsic::R600_ddx:
709 TextureOp = 8;
710 break;
711 case AMDGPUIntrinsic::R600_ddy:
712 TextureOp = 9;
713 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000714 case AMDGPUIntrinsic::R600_ldptr:
715 TextureOp = 10;
716 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000717 default:
718 llvm_unreachable("Unknow Texture Operation");
719 }
720
721 SDValue TexArgs[19] = {
722 DAG.getConstant(TextureOp, MVT::i32),
723 Op.getOperand(1),
724 DAG.getConstant(0, MVT::i32),
725 DAG.getConstant(1, MVT::i32),
726 DAG.getConstant(2, MVT::i32),
727 DAG.getConstant(3, MVT::i32),
728 Op.getOperand(2),
729 Op.getOperand(3),
730 Op.getOperand(4),
731 DAG.getConstant(0, MVT::i32),
732 DAG.getConstant(1, MVT::i32),
733 DAG.getConstant(2, MVT::i32),
734 DAG.getConstant(3, MVT::i32),
735 Op.getOperand(5),
736 Op.getOperand(6),
737 Op.getOperand(7),
738 Op.getOperand(8),
739 Op.getOperand(9),
740 Op.getOperand(10)
741 };
Craig Topper48d114b2014-04-26 18:35:24 +0000742 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000743 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000744 case AMDGPUIntrinsic::AMDGPU_dp4: {
745 SDValue Args[8] = {
746 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
747 DAG.getConstant(0, MVT::i32)),
748 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
749 DAG.getConstant(0, MVT::i32)),
750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
751 DAG.getConstant(1, MVT::i32)),
752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
753 DAG.getConstant(1, MVT::i32)),
754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
755 DAG.getConstant(2, MVT::i32)),
756 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
757 DAG.getConstant(2, MVT::i32)),
758 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
759 DAG.getConstant(3, MVT::i32)),
760 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
761 DAG.getConstant(3, MVT::i32))
762 };
Craig Topper48d114b2014-04-26 18:35:24 +0000763 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000764 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000765
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000766 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000767 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return LowerImplicitParameter(DAG, VT, DL, 8);
784
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000785 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000786 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
787 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000788 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000789 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
790 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000791 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000792 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
793 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000794 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000795 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
796 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
799 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000800 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
802 AMDGPU::T0_Z, VT);
803 }
804 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
805 break;
806 }
807 } // end switch(Op.getOpcode())
808 return SDValue();
809}
810
811void R600TargetLowering::ReplaceNodeResults(SDNode *N,
812 SmallVectorImpl<SDValue> &Results,
813 SelectionDAG &DAG) const {
814 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000815 default:
816 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
817 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000818 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000819 return;
820 case ISD::LOAD: {
821 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
822 Results.push_back(SDValue(Node, 0));
823 Results.push_back(SDValue(Node, 1));
824 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
825 // function
826 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
827 return;
828 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000829 case ISD::STORE:
830 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
831 Results.push_back(SDValue(Node, 0));
832 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000833 }
834}
835
Tom Stellard880a80a2014-06-17 16:53:14 +0000836SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
837 SDValue Vector) const {
838
839 SDLoc DL(Vector);
840 EVT VecVT = Vector.getValueType();
841 EVT EltVT = VecVT.getVectorElementType();
842 SmallVector<SDValue, 8> Args;
843
844 for (unsigned i = 0, e = VecVT.getVectorNumElements();
845 i != e; ++i) {
846 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
847 Vector, DAG.getConstant(i, getVectorIdxTy())));
848 }
849
850 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
851}
852
853SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
854 SelectionDAG &DAG) const {
855
856 SDLoc DL(Op);
857 SDValue Vector = Op.getOperand(0);
858 SDValue Index = Op.getOperand(1);
859
860 if (isa<ConstantSDNode>(Index) ||
861 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
862 return Op;
863
864 Vector = vectorToVerticalVector(DAG, Vector);
865 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
866 Vector, Index);
867}
868
869SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
870 SelectionDAG &DAG) const {
871 SDLoc DL(Op);
872 SDValue Vector = Op.getOperand(0);
873 SDValue Value = Op.getOperand(1);
874 SDValue Index = Op.getOperand(2);
875
876 if (isa<ConstantSDNode>(Index) ||
877 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
878 return Op;
879
880 Vector = vectorToVerticalVector(DAG, Vector);
881 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
882 Vector, Value, Index);
883 return vectorToVerticalVector(DAG, Insert);
884}
885
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000886SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
887 // On hw >= R700, COS/SIN input must be between -1. and 1.
888 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
889 EVT VT = Op.getValueType();
890 SDValue Arg = Op.getOperand(0);
891 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
892 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
893 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
894 DAG.getConstantFP(0.15915494309, MVT::f32)),
895 DAG.getConstantFP(0.5, MVT::f32)));
896 unsigned TrigNode;
897 switch (Op.getOpcode()) {
898 case ISD::FCOS:
899 TrigNode = AMDGPUISD::COS_HW;
900 break;
901 case ISD::FSIN:
902 TrigNode = AMDGPUISD::SIN_HW;
903 break;
904 default:
905 llvm_unreachable("Wrong trig opcode");
906 }
907 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
908 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
909 DAG.getConstantFP(-0.5, MVT::f32)));
910 if (Gen >= AMDGPUSubtarget::R700)
911 return TrigVal;
912 // On R600 hw, COS/SIN input must be between -Pi and Pi.
913 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
914 DAG.getConstantFP(3.14159265359, MVT::f32));
915}
916
Jan Vesely25f36272014-06-18 12:27:13 +0000917SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
918 SDLoc DL(Op);
919 EVT VT = Op.getValueType();
920
921 SDValue Lo = Op.getOperand(0);
922 SDValue Hi = Op.getOperand(1);
923 SDValue Shift = Op.getOperand(2);
924 SDValue Zero = DAG.getConstant(0, VT);
925 SDValue One = DAG.getConstant(1, VT);
926
927 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
928 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
929 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
930 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
931
932 // The dance around Width1 is necessary for 0 special case.
933 // Without it the CompShift might be 32, producing incorrect results in
934 // Overflow. So we do the shift in two steps, the alternative is to
935 // add a conditional to filter the special case.
936
937 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
938 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
939
940 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
941 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
942 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
943
944 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
945 SDValue LoBig = Zero;
946
947 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
948 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
949
950 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
951}
952
Jan Vesely900ff2e2014-06-18 12:27:15 +0000953SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
954 SDLoc DL(Op);
955 EVT VT = Op.getValueType();
956
957 SDValue Lo = Op.getOperand(0);
958 SDValue Hi = Op.getOperand(1);
959 SDValue Shift = Op.getOperand(2);
960 SDValue Zero = DAG.getConstant(0, VT);
961 SDValue One = DAG.getConstant(1, VT);
962
Jan Veselyecf51332014-06-18 12:27:17 +0000963 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
964
Jan Vesely900ff2e2014-06-18 12:27:15 +0000965 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
966 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
967 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
968 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
969
970 // The dance around Width1 is necessary for 0 special case.
971 // Without it the CompShift might be 32, producing incorrect results in
972 // Overflow. So we do the shift in two steps, the alternative is to
973 // add a conditional to filter the special case.
974
975 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
976 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
977
Jan Veselyecf51332014-06-18 12:27:17 +0000978 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000979 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
980 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
981
Jan Veselyecf51332014-06-18 12:27:17 +0000982 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
983 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +0000984
985 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
986 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
987
988 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
989}
990
Tom Stellard75aadc22012-12-11 21:25:42 +0000991SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
992 return DAG.getNode(
993 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000994 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000995 MVT::i1,
996 Op, DAG.getConstantFP(0.0f, MVT::f32),
997 DAG.getCondCode(ISD::SETNE)
998 );
999}
1000
Tom Stellard75aadc22012-12-11 21:25:42 +00001001SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001002 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001003 unsigned DwordOffset) const {
1004 unsigned ByteOffset = DwordOffset * 4;
1005 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001006 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001007
1008 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1009 assert(isInt<16>(ByteOffset));
1010
1011 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1012 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1013 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1014 false, false, false, 0);
1015}
1016
Tom Stellard75aadc22012-12-11 21:25:42 +00001017bool R600TargetLowering::isZero(SDValue Op) const {
1018 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1019 return Cst->isNullValue();
1020 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1021 return CstFP->isZero();
1022 } else {
1023 return false;
1024 }
1025}
1026
1027SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001028 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001029 EVT VT = Op.getValueType();
1030
1031 SDValue LHS = Op.getOperand(0);
1032 SDValue RHS = Op.getOperand(1);
1033 SDValue True = Op.getOperand(2);
1034 SDValue False = Op.getOperand(3);
1035 SDValue CC = Op.getOperand(4);
1036 SDValue Temp;
1037
1038 // LHS and RHS are guaranteed to be the same value type
1039 EVT CompareVT = LHS.getValueType();
1040
1041 // Check if we can lower this to a native operation.
1042
Tom Stellard2add82d2013-03-08 15:37:09 +00001043 // Try to lower to a SET* instruction:
1044 //
1045 // SET* can match the following patterns:
1046 //
Tom Stellardcd428182013-09-28 02:50:38 +00001047 // select_cc f32, f32, -1, 0, cc_supported
1048 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1049 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001050 //
1051
1052 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001053 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1054 ISD::CondCode InverseCC =
1055 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001056 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1057 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1058 std::swap(False, True);
1059 CC = DAG.getCondCode(InverseCC);
1060 } else {
1061 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1062 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1063 std::swap(False, True);
1064 std::swap(LHS, RHS);
1065 CC = DAG.getCondCode(SwapInvCC);
1066 }
1067 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001068 }
1069
1070 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1071 (CompareVT == VT || VT == MVT::i32)) {
1072 // This can be matched by a SET* instruction.
1073 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1074 }
1075
Tom Stellard75aadc22012-12-11 21:25:42 +00001076 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001077 //
1078 // CND* can match the following patterns:
1079 //
Tom Stellardcd428182013-09-28 02:50:38 +00001080 // select_cc f32, 0.0, f32, f32, cc_supported
1081 // select_cc f32, 0.0, i32, i32, cc_supported
1082 // select_cc i32, 0, f32, f32, cc_supported
1083 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001084 //
Tom Stellardcd428182013-09-28 02:50:38 +00001085
1086 // Try to move the zero value to the RHS
1087 if (isZero(LHS)) {
1088 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1089 // Try swapping the operands
1090 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1091 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1092 std::swap(LHS, RHS);
1093 CC = DAG.getCondCode(CCSwapped);
1094 } else {
1095 // Try inverting the conditon and then swapping the operands
1096 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1097 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1098 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1099 std::swap(True, False);
1100 std::swap(LHS, RHS);
1101 CC = DAG.getCondCode(CCSwapped);
1102 }
1103 }
1104 }
1105 if (isZero(RHS)) {
1106 SDValue Cond = LHS;
1107 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001108 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1109 if (CompareVT != VT) {
1110 // Bitcast True / False to the correct types. This will end up being
1111 // a nop, but it allows us to define only a single pattern in the
1112 // .TD files for each CND* instruction rather than having to have
1113 // one pattern for integer True/False and one for fp True/False
1114 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1115 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1116 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001117
1118 switch (CCOpcode) {
1119 case ISD::SETONE:
1120 case ISD::SETUNE:
1121 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001122 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1123 Temp = True;
1124 True = False;
1125 False = Temp;
1126 break;
1127 default:
1128 break;
1129 }
1130 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1131 Cond, Zero,
1132 True, False,
1133 DAG.getCondCode(CCOpcode));
1134 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1135 }
1136
Tom Stellard75aadc22012-12-11 21:25:42 +00001137 // If we make it this for it means we have no native instructions to handle
1138 // this SELECT_CC, so we must lower it.
1139 SDValue HWTrue, HWFalse;
1140
1141 if (CompareVT == MVT::f32) {
1142 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1143 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1144 } else if (CompareVT == MVT::i32) {
1145 HWTrue = DAG.getConstant(-1, CompareVT);
1146 HWFalse = DAG.getConstant(0, CompareVT);
1147 }
1148 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001149 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001150 }
1151
1152 // Lower this unsupported SELECT_CC into a combination of two supported
1153 // SELECT_CC operations.
1154 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1155
1156 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1157 Cond, HWFalse,
1158 True, False,
1159 DAG.getCondCode(ISD::SETNE));
1160}
1161
Alp Tokercb402912014-01-24 17:20:08 +00001162/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001163/// convert these pointers to a register index. Each register holds
1164/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1165/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1166/// for indirect addressing.
1167SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1168 unsigned StackWidth,
1169 SelectionDAG &DAG) const {
1170 unsigned SRLPad;
1171 switch(StackWidth) {
1172 case 1:
1173 SRLPad = 2;
1174 break;
1175 case 2:
1176 SRLPad = 3;
1177 break;
1178 case 4:
1179 SRLPad = 4;
1180 break;
1181 default: llvm_unreachable("Invalid stack width");
1182 }
1183
Andrew Trickef9de2a2013-05-25 02:42:55 +00001184 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001185 DAG.getConstant(SRLPad, MVT::i32));
1186}
1187
1188void R600TargetLowering::getStackAddress(unsigned StackWidth,
1189 unsigned ElemIdx,
1190 unsigned &Channel,
1191 unsigned &PtrIncr) const {
1192 switch (StackWidth) {
1193 default:
1194 case 1:
1195 Channel = 0;
1196 if (ElemIdx > 0) {
1197 PtrIncr = 1;
1198 } else {
1199 PtrIncr = 0;
1200 }
1201 break;
1202 case 2:
1203 Channel = ElemIdx % 2;
1204 if (ElemIdx == 2) {
1205 PtrIncr = 1;
1206 } else {
1207 PtrIncr = 0;
1208 }
1209 break;
1210 case 4:
1211 Channel = ElemIdx;
1212 PtrIncr = 0;
1213 break;
1214 }
1215}
1216
Tom Stellard75aadc22012-12-11 21:25:42 +00001217SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001218 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001219 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1220 SDValue Chain = Op.getOperand(0);
1221 SDValue Value = Op.getOperand(1);
1222 SDValue Ptr = Op.getOperand(2);
1223
Tom Stellard2ffc3302013-08-26 15:05:44 +00001224 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001225 if (Result.getNode()) {
1226 return Result;
1227 }
1228
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001229 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1230 if (StoreNode->isTruncatingStore()) {
1231 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001232 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001233 EVT MemVT = StoreNode->getMemoryVT();
1234 SDValue MaskConstant;
1235 if (MemVT == MVT::i8) {
1236 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1237 } else {
1238 assert(MemVT == MVT::i16);
1239 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1240 }
1241 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1242 DAG.getConstant(2, MVT::i32));
1243 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1244 DAG.getConstant(0x00000003, VT));
1245 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1246 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1247 DAG.getConstant(3, VT));
1248 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1249 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1250 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1251 // vector instead.
1252 SDValue Src[4] = {
1253 ShiftedValue,
1254 DAG.getConstant(0, MVT::i32),
1255 DAG.getConstant(0, MVT::i32),
1256 Mask
1257 };
Craig Topper48d114b2014-04-26 18:35:24 +00001258 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001259 SDValue Args[3] = { Chain, Input, DWordAddr };
1260 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001261 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001262 StoreNode->getMemOperand());
1263 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1264 Value.getValueType().bitsGE(MVT::i32)) {
1265 // Convert pointer from byte address to dword address.
1266 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1267 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1268 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001269
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001270 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001271 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001272 } else {
1273 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1274 }
1275 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001276 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001277 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001278
1279 EVT ValueVT = Value.getValueType();
1280
1281 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1282 return SDValue();
1283 }
1284
Tom Stellarde9373602014-01-22 19:24:14 +00001285 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1286 if (Ret.getNode()) {
1287 return Ret;
1288 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001289 // Lowering for indirect addressing
1290
1291 const MachineFunction &MF = DAG.getMachineFunction();
1292 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1293 getTargetMachine().getFrameLowering());
1294 unsigned StackWidth = TFL->getStackWidth(MF);
1295
1296 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1297
1298 if (ValueVT.isVector()) {
1299 unsigned NumElemVT = ValueVT.getVectorNumElements();
1300 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001301 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001302
1303 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1304 "vector width in load");
1305
1306 for (unsigned i = 0; i < NumElemVT; ++i) {
1307 unsigned Channel, PtrIncr;
1308 getStackAddress(StackWidth, i, Channel, PtrIncr);
1309 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1310 DAG.getConstant(PtrIncr, MVT::i32));
1311 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1312 Value, DAG.getConstant(i, MVT::i32));
1313
1314 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1315 Chain, Elem, Ptr,
1316 DAG.getTargetConstant(Channel, MVT::i32));
1317 }
Craig Topper48d114b2014-04-26 18:35:24 +00001318 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001319 } else {
1320 if (ValueVT == MVT::i8) {
1321 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1322 }
1323 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001324 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001325 }
1326
1327 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001328}
1329
Tom Stellard365366f2013-01-23 02:09:06 +00001330// return (512 + (kc_bank << 12)
1331static int
1332ConstantAddressBlock(unsigned AddressSpace) {
1333 switch (AddressSpace) {
1334 case AMDGPUAS::CONSTANT_BUFFER_0:
1335 return 512;
1336 case AMDGPUAS::CONSTANT_BUFFER_1:
1337 return 512 + 4096;
1338 case AMDGPUAS::CONSTANT_BUFFER_2:
1339 return 512 + 4096 * 2;
1340 case AMDGPUAS::CONSTANT_BUFFER_3:
1341 return 512 + 4096 * 3;
1342 case AMDGPUAS::CONSTANT_BUFFER_4:
1343 return 512 + 4096 * 4;
1344 case AMDGPUAS::CONSTANT_BUFFER_5:
1345 return 512 + 4096 * 5;
1346 case AMDGPUAS::CONSTANT_BUFFER_6:
1347 return 512 + 4096 * 6;
1348 case AMDGPUAS::CONSTANT_BUFFER_7:
1349 return 512 + 4096 * 7;
1350 case AMDGPUAS::CONSTANT_BUFFER_8:
1351 return 512 + 4096 * 8;
1352 case AMDGPUAS::CONSTANT_BUFFER_9:
1353 return 512 + 4096 * 9;
1354 case AMDGPUAS::CONSTANT_BUFFER_10:
1355 return 512 + 4096 * 10;
1356 case AMDGPUAS::CONSTANT_BUFFER_11:
1357 return 512 + 4096 * 11;
1358 case AMDGPUAS::CONSTANT_BUFFER_12:
1359 return 512 + 4096 * 12;
1360 case AMDGPUAS::CONSTANT_BUFFER_13:
1361 return 512 + 4096 * 13;
1362 case AMDGPUAS::CONSTANT_BUFFER_14:
1363 return 512 + 4096 * 14;
1364 case AMDGPUAS::CONSTANT_BUFFER_15:
1365 return 512 + 4096 * 15;
1366 default:
1367 return -1;
1368 }
1369}
1370
1371SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1372{
1373 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001374 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001375 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1376 SDValue Chain = Op.getOperand(0);
1377 SDValue Ptr = Op.getOperand(1);
1378 SDValue LoweredLoad;
1379
Tom Stellarde9373602014-01-22 19:24:14 +00001380 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1381 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001382 SDValue Ops[2] = {
1383 Ret,
1384 Chain
1385 };
Craig Topper64941d92014-04-27 19:20:57 +00001386 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001387 }
1388
1389
Tom Stellard35bb18c2013-08-26 15:06:04 +00001390 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1391 SDValue MergedValues[2] = {
1392 SplitVectorLoad(Op, DAG),
1393 Chain
1394 };
Craig Topper64941d92014-04-27 19:20:57 +00001395 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001396 }
1397
Tom Stellard365366f2013-01-23 02:09:06 +00001398 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001399 if (ConstantBlock > -1 &&
1400 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1401 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001402 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001403 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1404 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001405 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001406 SDValue Slots[4];
1407 for (unsigned i = 0; i < 4; i++) {
1408 // We want Const position encoded with the following formula :
1409 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1410 // const_index is Ptr computed by llvm using an alignment of 16.
1411 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1412 // then div by 4 at the ISel step
1413 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1414 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1415 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1416 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001417 EVT NewVT = MVT::v4i32;
1418 unsigned NumElements = 4;
1419 if (VT.isVector()) {
1420 NewVT = VT;
1421 NumElements = VT.getVectorNumElements();
1422 }
Craig Topper48d114b2014-04-26 18:35:24 +00001423 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001424 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001425 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001426 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001427 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001428 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001429 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001430 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001431 );
1432 }
1433
1434 if (!VT.isVector()) {
1435 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1436 DAG.getConstant(0, MVT::i32));
1437 }
1438
1439 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001440 Result,
1441 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001442 };
Craig Topper64941d92014-04-27 19:20:57 +00001443 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001444 }
1445
Matt Arsenault909d0c02013-10-30 23:43:29 +00001446 // For most operations returning SDValue() will result in the node being
1447 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1448 // need to manually expand loads that may be legal in some address spaces and
1449 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1450 // compute shaders, since the data is sign extended when it is uploaded to the
1451 // buffer. However SEXT loads from other address spaces are not supported, so
1452 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001453 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1454 EVT MemVT = LoadNode->getMemoryVT();
1455 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1456 SDValue ShiftAmount =
1457 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1458 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1459 LoadNode->getPointerInfo(), MemVT,
1460 LoadNode->isVolatile(),
1461 LoadNode->isNonTemporal(),
1462 LoadNode->getAlignment());
1463 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1464 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1465
1466 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001467 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001468 }
1469
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001470 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1471 return SDValue();
1472 }
1473
1474 // Lowering for indirect addressing
1475 const MachineFunction &MF = DAG.getMachineFunction();
1476 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1477 getTargetMachine().getFrameLowering());
1478 unsigned StackWidth = TFL->getStackWidth(MF);
1479
1480 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1481
1482 if (VT.isVector()) {
1483 unsigned NumElemVT = VT.getVectorNumElements();
1484 EVT ElemVT = VT.getVectorElementType();
1485 SDValue Loads[4];
1486
1487 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1488 "vector width in load");
1489
1490 for (unsigned i = 0; i < NumElemVT; ++i) {
1491 unsigned Channel, PtrIncr;
1492 getStackAddress(StackWidth, i, Channel, PtrIncr);
1493 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1494 DAG.getConstant(PtrIncr, MVT::i32));
1495 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1496 Chain, Ptr,
1497 DAG.getTargetConstant(Channel, MVT::i32),
1498 Op.getOperand(2));
1499 }
1500 for (unsigned i = NumElemVT; i < 4; ++i) {
1501 Loads[i] = DAG.getUNDEF(ElemVT);
1502 }
1503 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001504 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001505 } else {
1506 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1507 Chain, Ptr,
1508 DAG.getTargetConstant(0, MVT::i32), // Channel
1509 Op.getOperand(2));
1510 }
1511
Matt Arsenault7939acd2014-04-07 16:44:24 +00001512 SDValue Ops[2] = {
1513 LoweredLoad,
1514 Chain
1515 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001516
Craig Topper64941d92014-04-27 19:20:57 +00001517 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001518}
Tom Stellard75aadc22012-12-11 21:25:42 +00001519
Tom Stellard75aadc22012-12-11 21:25:42 +00001520/// XXX Only kernel functions are supported, so we can assume for now that
1521/// every function is a kernel function, but in the future we should use
1522/// separate calling conventions for kernel and non-kernel functions.
1523SDValue R600TargetLowering::LowerFormalArguments(
1524 SDValue Chain,
1525 CallingConv::ID CallConv,
1526 bool isVarArg,
1527 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001528 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001529 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001530 SmallVector<CCValAssign, 16> ArgLocs;
1531 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1532 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001533 MachineFunction &MF = DAG.getMachineFunction();
1534 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001535
Tom Stellardaf775432013-10-23 00:44:32 +00001536 SmallVector<ISD::InputArg, 8> LocalIns;
1537
Matt Arsenault209a7b92014-04-18 07:40:20 +00001538 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001539
1540 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001541
Tom Stellard1e803092013-07-23 01:48:18 +00001542 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001543 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001544 EVT VT = Ins[i].VT;
1545 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001546
Vincent Lejeunef143af32013-11-11 22:10:24 +00001547 if (ShaderType != ShaderType::COMPUTE) {
1548 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1549 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1550 InVals.push_back(Register);
1551 continue;
1552 }
1553
Tom Stellard75aadc22012-12-11 21:25:42 +00001554 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001555 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001556
Matt Arsenaultfae02982014-03-17 18:58:11 +00001557 // i64 isn't a legal type, so the register type used ends up as i32, which
1558 // isn't expected here. It attempts to create this sextload, but it ends up
1559 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1560 // for <1 x i64>.
1561
Tom Stellardacfeebf2013-07-23 01:48:05 +00001562 // The first 36 bytes of the input buffer contains information about
1563 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001564
1565 // FIXME: This should really check the extload type, but the handling of
1566 // extload vecto parameters seems to be broken.
1567 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1568 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1569 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001570 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1571 MachinePointerInfo(UndefValue::get(PtrTy)),
1572 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001573
1574 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001575 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001576 }
1577 return Chain;
1578}
1579
Matt Arsenault758659232013-05-18 00:21:46 +00001580EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001581 if (!VT.isVector())
1582 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001583 return VT.changeVectorElementTypeToInteger();
1584}
1585
Matt Arsenault209a7b92014-04-18 07:40:20 +00001586static SDValue CompactSwizzlableVector(
1587 SelectionDAG &DAG, SDValue VectorEntry,
1588 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001589 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1590 assert(RemapSwizzle.empty());
1591 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001592 VectorEntry.getOperand(0),
1593 VectorEntry.getOperand(1),
1594 VectorEntry.getOperand(2),
1595 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001596 };
1597
1598 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001599 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1600 // We mask write here to teach later passes that the ith element of this
1601 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1602 // break false dependencies and additionnaly make assembly easier to read.
1603 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001604 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1605 if (C->isZero()) {
1606 RemapSwizzle[i] = 4; // SEL_0
1607 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1608 } else if (C->isExactlyValue(1.0)) {
1609 RemapSwizzle[i] = 5; // SEL_1
1610 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1611 }
1612 }
1613
1614 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1615 continue;
1616 for (unsigned j = 0; j < i; j++) {
1617 if (NewBldVec[i] == NewBldVec[j]) {
1618 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1619 RemapSwizzle[i] = j;
1620 break;
1621 }
1622 }
1623 }
1624
1625 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001626 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001627}
1628
Benjamin Kramer193960c2013-06-11 13:32:25 +00001629static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1630 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001631 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1632 assert(RemapSwizzle.empty());
1633 SDValue NewBldVec[4] = {
1634 VectorEntry.getOperand(0),
1635 VectorEntry.getOperand(1),
1636 VectorEntry.getOperand(2),
1637 VectorEntry.getOperand(3)
1638 };
1639 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001640 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001641 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001642 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1643 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1644 ->getZExtValue();
1645 if (i == Idx)
1646 isUnmovable[Idx] = true;
1647 }
1648 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001649
1650 for (unsigned i = 0; i < 4; i++) {
1651 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1652 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1653 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001654 if (isUnmovable[Idx])
1655 continue;
1656 // Swap i and Idx
1657 std::swap(NewBldVec[Idx], NewBldVec[i]);
1658 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1659 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001660 }
1661 }
1662
1663 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001664 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001665}
1666
1667
1668SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1669SDValue Swz[4], SelectionDAG &DAG) const {
1670 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1671 // Old -> New swizzle values
1672 DenseMap<unsigned, unsigned> SwizzleRemap;
1673
1674 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1675 for (unsigned i = 0; i < 4; i++) {
1676 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1677 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1678 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1679 }
1680
1681 SwizzleRemap.clear();
1682 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1683 for (unsigned i = 0; i < 4; i++) {
1684 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1685 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1686 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1687 }
1688
1689 return BuildVector;
1690}
1691
1692
Tom Stellard75aadc22012-12-11 21:25:42 +00001693//===----------------------------------------------------------------------===//
1694// Custom DAG Optimizations
1695//===----------------------------------------------------------------------===//
1696
1697SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1698 DAGCombinerInfo &DCI) const {
1699 SelectionDAG &DAG = DCI.DAG;
1700
1701 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001702 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001703 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1704 case ISD::FP_ROUND: {
1705 SDValue Arg = N->getOperand(0);
1706 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001707 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001708 Arg.getOperand(0));
1709 }
1710 break;
1711 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001712
1713 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1714 // (i32 select_cc f32, f32, -1, 0 cc)
1715 //
1716 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1717 // this to one of the SET*_DX10 instructions.
1718 case ISD::FP_TO_SINT: {
1719 SDValue FNeg = N->getOperand(0);
1720 if (FNeg.getOpcode() != ISD::FNEG) {
1721 return SDValue();
1722 }
1723 SDValue SelectCC = FNeg.getOperand(0);
1724 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1725 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1726 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1727 !isHWTrueValue(SelectCC.getOperand(2)) ||
1728 !isHWFalseValue(SelectCC.getOperand(3))) {
1729 return SDValue();
1730 }
1731
Andrew Trickef9de2a2013-05-25 02:42:55 +00001732 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001733 SelectCC.getOperand(0), // LHS
1734 SelectCC.getOperand(1), // RHS
1735 DAG.getConstant(-1, MVT::i32), // True
1736 DAG.getConstant(0, MVT::i32), // Flase
1737 SelectCC.getOperand(4)); // CC
1738
1739 break;
1740 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001741
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001742 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1743 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001744 case ISD::INSERT_VECTOR_ELT: {
1745 SDValue InVec = N->getOperand(0);
1746 SDValue InVal = N->getOperand(1);
1747 SDValue EltNo = N->getOperand(2);
1748 SDLoc dl(N);
1749
1750 // If the inserted element is an UNDEF, just use the input vector.
1751 if (InVal.getOpcode() == ISD::UNDEF)
1752 return InVec;
1753
1754 EVT VT = InVec.getValueType();
1755
1756 // If we can't generate a legal BUILD_VECTOR, exit
1757 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1758 return SDValue();
1759
1760 // Check that we know which element is being inserted
1761 if (!isa<ConstantSDNode>(EltNo))
1762 return SDValue();
1763 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1764
1765 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1766 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1767 // vector elements.
1768 SmallVector<SDValue, 8> Ops;
1769 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1770 Ops.append(InVec.getNode()->op_begin(),
1771 InVec.getNode()->op_end());
1772 } else if (InVec.getOpcode() == ISD::UNDEF) {
1773 unsigned NElts = VT.getVectorNumElements();
1774 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1775 } else {
1776 return SDValue();
1777 }
1778
1779 // Insert the element
1780 if (Elt < Ops.size()) {
1781 // All the operands of BUILD_VECTOR must have the same type;
1782 // we enforce that here.
1783 EVT OpVT = Ops[0].getValueType();
1784 if (InVal.getValueType() != OpVT)
1785 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1786 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1787 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1788 Ops[Elt] = InVal;
1789 }
1790
1791 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001792 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001793 }
1794
Tom Stellard365366f2013-01-23 02:09:06 +00001795 // Extract_vec (Build_vector) generated by custom lowering
1796 // also needs to be customly combined
1797 case ISD::EXTRACT_VECTOR_ELT: {
1798 SDValue Arg = N->getOperand(0);
1799 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1800 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1801 unsigned Element = Const->getZExtValue();
1802 return Arg->getOperand(Element);
1803 }
1804 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001805 if (Arg.getOpcode() == ISD::BITCAST &&
1806 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1807 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1808 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001809 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001810 Arg->getOperand(0).getOperand(Element));
1811 }
1812 }
Tom Stellard365366f2013-01-23 02:09:06 +00001813 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001814
1815 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001816 // Try common optimizations
1817 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1818 if (Ret.getNode())
1819 return Ret;
1820
Tom Stellarde06163a2013-02-07 14:02:35 +00001821 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1822 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001823 //
1824 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1825 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001826 SDValue LHS = N->getOperand(0);
1827 if (LHS.getOpcode() != ISD::SELECT_CC) {
1828 return SDValue();
1829 }
1830
1831 SDValue RHS = N->getOperand(1);
1832 SDValue True = N->getOperand(2);
1833 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001834 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001835
1836 if (LHS.getOperand(2).getNode() != True.getNode() ||
1837 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001838 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001839 return SDValue();
1840 }
1841
Tom Stellard5e524892013-03-08 15:37:11 +00001842 switch (NCC) {
1843 default: return SDValue();
1844 case ISD::SETNE: return LHS;
1845 case ISD::SETEQ: {
1846 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1847 LHSCC = ISD::getSetCCInverse(LHSCC,
1848 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001849 if (DCI.isBeforeLegalizeOps() ||
1850 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1851 return DAG.getSelectCC(SDLoc(N),
1852 LHS.getOperand(0),
1853 LHS.getOperand(1),
1854 LHS.getOperand(2),
1855 LHS.getOperand(3),
1856 LHSCC);
1857 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001858 }
Tom Stellard5e524892013-03-08 15:37:11 +00001859 }
Tom Stellardcd428182013-09-28 02:50:38 +00001860 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001861 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001862
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001863 case AMDGPUISD::EXPORT: {
1864 SDValue Arg = N->getOperand(1);
1865 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1866 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001867
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001868 SDValue NewArgs[8] = {
1869 N->getOperand(0), // Chain
1870 SDValue(),
1871 N->getOperand(2), // ArrayBase
1872 N->getOperand(3), // Type
1873 N->getOperand(4), // SWZ_X
1874 N->getOperand(5), // SWZ_Y
1875 N->getOperand(6), // SWZ_Z
1876 N->getOperand(7) // SWZ_W
1877 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001878 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001879 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001880 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001881 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001882 case AMDGPUISD::TEXTURE_FETCH: {
1883 SDValue Arg = N->getOperand(1);
1884 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1885 break;
1886
1887 SDValue NewArgs[19] = {
1888 N->getOperand(0),
1889 N->getOperand(1),
1890 N->getOperand(2),
1891 N->getOperand(3),
1892 N->getOperand(4),
1893 N->getOperand(5),
1894 N->getOperand(6),
1895 N->getOperand(7),
1896 N->getOperand(8),
1897 N->getOperand(9),
1898 N->getOperand(10),
1899 N->getOperand(11),
1900 N->getOperand(12),
1901 N->getOperand(13),
1902 N->getOperand(14),
1903 N->getOperand(15),
1904 N->getOperand(16),
1905 N->getOperand(17),
1906 N->getOperand(18),
1907 };
1908 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1909 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00001910 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001911 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001912 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00001913
1914 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001915}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001916
1917static bool
1918FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001919 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001920 const R600InstrInfo *TII =
1921 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1922 if (!Src.isMachineOpcode())
1923 return false;
1924 switch (Src.getMachineOpcode()) {
1925 case AMDGPU::FNEG_R600:
1926 if (!Neg.getNode())
1927 return false;
1928 Src = Src.getOperand(0);
1929 Neg = DAG.getTargetConstant(1, MVT::i32);
1930 return true;
1931 case AMDGPU::FABS_R600:
1932 if (!Abs.getNode())
1933 return false;
1934 Src = Src.getOperand(0);
1935 Abs = DAG.getTargetConstant(1, MVT::i32);
1936 return true;
1937 case AMDGPU::CONST_COPY: {
1938 unsigned Opcode = ParentNode->getMachineOpcode();
1939 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1940
1941 if (!Sel.getNode())
1942 return false;
1943
1944 SDValue CstOffset = Src.getOperand(0);
1945 if (ParentNode->getValueType(0).isVector())
1946 return false;
1947
1948 // Gather constants values
1949 int SrcIndices[] = {
1950 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1951 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1952 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1953 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1954 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1955 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1956 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1957 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1958 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1959 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1960 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1961 };
1962 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00001963 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001964 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1965 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1966 continue;
1967 if (HasDst) {
1968 OtherSrcIdx--;
1969 OtherSelIdx--;
1970 }
1971 if (RegisterSDNode *Reg =
1972 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1973 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00001974 ConstantSDNode *Cst
1975 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001976 Consts.push_back(Cst->getZExtValue());
1977 }
1978 }
1979 }
1980
Matt Arsenault37c12d72014-05-12 20:42:57 +00001981 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001982 Consts.push_back(Cst->getZExtValue());
1983 if (!TII->fitsConstReadLimitations(Consts)) {
1984 return false;
1985 }
1986
1987 Sel = CstOffset;
1988 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1989 return true;
1990 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001991 case AMDGPU::MOV_IMM_I32:
1992 case AMDGPU::MOV_IMM_F32: {
1993 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1994 uint64_t ImmValue = 0;
1995
1996
1997 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1998 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1999 float FloatValue = FPC->getValueAPF().convertToFloat();
2000 if (FloatValue == 0.0) {
2001 ImmReg = AMDGPU::ZERO;
2002 } else if (FloatValue == 0.5) {
2003 ImmReg = AMDGPU::HALF;
2004 } else if (FloatValue == 1.0) {
2005 ImmReg = AMDGPU::ONE;
2006 } else {
2007 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2008 }
2009 } else {
2010 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2011 uint64_t Value = C->getZExtValue();
2012 if (Value == 0) {
2013 ImmReg = AMDGPU::ZERO;
2014 } else if (Value == 1) {
2015 ImmReg = AMDGPU::ONE_INT;
2016 } else {
2017 ImmValue = Value;
2018 }
2019 }
2020
2021 // Check that we aren't already using an immediate.
2022 // XXX: It's possible for an instruction to have more than one
2023 // immediate operand, but this is not supported yet.
2024 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2025 if (!Imm.getNode())
2026 return false;
2027 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2028 assert(C);
2029 if (C->getZExtValue())
2030 return false;
2031 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2032 }
2033 Src = DAG.getRegister(ImmReg, MVT::i32);
2034 return true;
2035 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002036 default:
2037 return false;
2038 }
2039}
2040
2041
2042/// \brief Fold the instructions after selecting them
2043SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2044 SelectionDAG &DAG) const {
2045 const R600InstrInfo *TII =
2046 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2047 if (!Node->isMachineOpcode())
2048 return Node;
2049 unsigned Opcode = Node->getMachineOpcode();
2050 SDValue FakeOp;
2051
2052 std::vector<SDValue> Ops;
2053 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
2054 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002055 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002056
2057 if (Opcode == AMDGPU::DOT_4) {
2058 int OperandIdx[] = {
2059 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2060 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2061 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2062 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2063 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2064 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2065 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2066 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002067 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002068 int NegIdx[] = {
2069 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2070 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2071 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2072 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2073 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2074 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2075 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2076 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2077 };
2078 int AbsIdx[] = {
2079 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2080 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2081 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2082 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2085 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2086 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2087 };
2088 for (unsigned i = 0; i < 8; i++) {
2089 if (OperandIdx[i] < 0)
2090 return Node;
2091 SDValue &Src = Ops[OperandIdx[i] - 1];
2092 SDValue &Neg = Ops[NegIdx[i] - 1];
2093 SDValue &Abs = Ops[AbsIdx[i] - 1];
2094 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2095 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2096 if (HasDst)
2097 SelIdx--;
2098 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002099 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2100 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2101 }
2102 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2103 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2104 SDValue &Src = Ops[i];
2105 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002106 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2107 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002108 } else if (Opcode == AMDGPU::CLAMP_R600) {
2109 SDValue Src = Node->getOperand(0);
2110 if (!Src.isMachineOpcode() ||
2111 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2112 return Node;
2113 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2114 AMDGPU::OpName::clamp);
2115 if (ClampIdx < 0)
2116 return Node;
2117 std::vector<SDValue> Ops;
2118 unsigned NumOp = Src.getNumOperands();
2119 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002120 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002121 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2122 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2123 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002124 } else {
2125 if (!TII->hasInstrModifiers(Opcode))
2126 return Node;
2127 int OperandIdx[] = {
2128 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2129 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2130 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2131 };
2132 int NegIdx[] = {
2133 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2134 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2135 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2136 };
2137 int AbsIdx[] = {
2138 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2139 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2140 -1
2141 };
2142 for (unsigned i = 0; i < 3; i++) {
2143 if (OperandIdx[i] < 0)
2144 return Node;
2145 SDValue &Src = Ops[OperandIdx[i] - 1];
2146 SDValue &Neg = Ops[NegIdx[i] - 1];
2147 SDValue FakeAbs;
2148 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2149 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2150 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002151 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2152 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002153 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002154 ImmIdx--;
2155 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002156 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002157 SDValue &Imm = Ops[ImmIdx];
2158 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002159 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2160 }
2161 }
2162
2163 return Node;
2164}