blob: 222c3d72cabac91f8167f57cde11dfd4c72297d9 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDILIntrinsicInfo.h"
17#include "AMDGPUFrameLowering.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000071
72 setOperationAction(ISD::FSUB, MVT::f32, Expand);
73
74 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
75 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000077
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
79 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
80
Tom Stellarde8f9f282013-03-08 15:37:05 +000081 setOperationAction(ISD::SETCC, MVT::i32, Expand);
82 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000083 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
84
Tom Stellard53f2f902013-09-05 18:38:03 +000085 setOperationAction(ISD::SELECT, MVT::i32, Expand);
86 setOperationAction(ISD::SELECT, MVT::f32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000088 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Matt Arsenault4e466652014-04-16 01:41:30 +000090 // Expand sign extension of vectors
91 if (!Subtarget->hasBFE())
92 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
93
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
96
97 if (!Subtarget->hasBFE())
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
101
102 if (!Subtarget->hasBFE())
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
106
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
112
113
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000114 // Legalize loads and stores to the private address space.
115 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000116 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000117 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000118
119 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
120 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000121 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
122 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000125 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
127
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000128 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000129 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000130 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000131 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000132 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
133 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
Tom Stellard365366f2013-01-23 02:09:06 +0000135 setOperationAction(ISD::LOAD, MVT::i32, Custom);
136 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000137 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
138
Tom Stellard880a80a2014-06-17 16:53:14 +0000139 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
140 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
141 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
143
144 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
145 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
147 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
148
Tom Stellard75aadc22012-12-11 21:25:42 +0000149 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000150 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000151 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000152 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000153 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000154
Tom Stellard5f337882014-04-29 23:12:43 +0000155 // These should be replaced by UDVIREM, but it does not happen automatically
156 // during Type Legalization
157 setOperationAction(ISD::UDIV, MVT::i64, Custom);
158 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000159 setOperationAction(ISD::SDIV, MVT::i64, Custom);
160 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000161
Jan Vesely25f36272014-06-18 12:27:13 +0000162 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
163 // to be Legal/Custom in order to avoid library calls.
164 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000165 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000166 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000167
Michel Danzer49812b52013-07-10 16:37:07 +0000168 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
169
Tom Stellardb852af52013-03-08 15:37:03 +0000170 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000171 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000172 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173}
174
175MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
176 MachineInstr * MI, MachineBasicBlock * BB) const {
177 MachineFunction * MF = BB->getParent();
178 MachineRegisterInfo &MRI = MF->getRegInfo();
179 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000180 const R600InstrInfo *TII =
181 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000182
183 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000184 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000185 // Replace LDS_*_RET instruction that don't have any uses with the
186 // equivalent LDS_*_NORET instruction.
187 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000188 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
189 assert(DstIdx != -1);
190 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000191 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
192 return BB;
193
194 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
195 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000196 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
197 NewMI.addOperand(MI->getOperand(i));
198 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000199 } else {
200 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
201 }
202 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000203 case AMDGPU::CLAMP_R600: {
204 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
205 AMDGPU::MOV,
206 MI->getOperand(0).getReg(),
207 MI->getOperand(1).getReg());
208 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
209 break;
210 }
211
212 case AMDGPU::FABS_R600: {
213 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
214 AMDGPU::MOV,
215 MI->getOperand(0).getReg(),
216 MI->getOperand(1).getReg());
217 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
218 break;
219 }
220
221 case AMDGPU::FNEG_R600: {
222 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
223 AMDGPU::MOV,
224 MI->getOperand(0).getReg(),
225 MI->getOperand(1).getReg());
226 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
227 break;
228 }
229
Tom Stellard75aadc22012-12-11 21:25:42 +0000230 case AMDGPU::MASK_WRITE: {
231 unsigned maskedRegister = MI->getOperand(0).getReg();
232 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
233 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
234 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
235 break;
236 }
237
238 case AMDGPU::MOV_IMM_F32:
239 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
240 MI->getOperand(1).getFPImm()->getValueAPF()
241 .bitcastToAPInt().getZExtValue());
242 break;
243 case AMDGPU::MOV_IMM_I32:
244 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
245 MI->getOperand(1).getImm());
246 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000247 case AMDGPU::CONST_COPY: {
248 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
249 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000250 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000251 MI->getOperand(1).getImm());
252 break;
253 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000254
255 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000256 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000257 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000258 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000259
260 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
261 .addOperand(MI->getOperand(0))
262 .addOperand(MI->getOperand(1))
263 .addImm(EOP); // Set End of program bit
264 break;
265 }
266
Tom Stellard75aadc22012-12-11 21:25:42 +0000267 case AMDGPU::TXD: {
268 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
269 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000270 MachineOperand &RID = MI->getOperand(4);
271 MachineOperand &SID = MI->getOperand(5);
272 unsigned TextureId = MI->getOperand(6).getImm();
273 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
274 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000275
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000276 switch (TextureId) {
277 case 5: // Rect
278 CTX = CTY = 0;
279 break;
280 case 6: // Shadow1D
281 SrcW = SrcZ;
282 break;
283 case 7: // Shadow2D
284 SrcW = SrcZ;
285 break;
286 case 8: // ShadowRect
287 CTX = CTY = 0;
288 SrcW = SrcZ;
289 break;
290 case 9: // 1DArray
291 SrcZ = SrcY;
292 CTZ = 0;
293 break;
294 case 10: // 2DArray
295 CTZ = 0;
296 break;
297 case 11: // Shadow1DArray
298 SrcZ = SrcY;
299 CTZ = 0;
300 break;
301 case 12: // Shadow2DArray
302 CTZ = 0;
303 break;
304 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000305 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
306 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 .addImm(SrcX)
308 .addImm(SrcY)
309 .addImm(SrcZ)
310 .addImm(SrcW)
311 .addImm(0)
312 .addImm(0)
313 .addImm(0)
314 .addImm(0)
315 .addImm(1)
316 .addImm(2)
317 .addImm(3)
318 .addOperand(RID)
319 .addOperand(SID)
320 .addImm(CTX)
321 .addImm(CTY)
322 .addImm(CTZ)
323 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000324 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
325 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000326 .addImm(SrcX)
327 .addImm(SrcY)
328 .addImm(SrcZ)
329 .addImm(SrcW)
330 .addImm(0)
331 .addImm(0)
332 .addImm(0)
333 .addImm(0)
334 .addImm(1)
335 .addImm(2)
336 .addImm(3)
337 .addOperand(RID)
338 .addOperand(SID)
339 .addImm(CTX)
340 .addImm(CTY)
341 .addImm(CTZ)
342 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000343 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
344 .addOperand(MI->getOperand(0))
345 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000346 .addImm(SrcX)
347 .addImm(SrcY)
348 .addImm(SrcZ)
349 .addImm(SrcW)
350 .addImm(0)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(1)
355 .addImm(2)
356 .addImm(3)
357 .addOperand(RID)
358 .addOperand(SID)
359 .addImm(CTX)
360 .addImm(CTY)
361 .addImm(CTZ)
362 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000363 .addReg(T0, RegState::Implicit)
364 .addReg(T1, RegState::Implicit);
365 break;
366 }
367
368 case AMDGPU::TXD_SHADOW: {
369 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
370 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000371 MachineOperand &RID = MI->getOperand(4);
372 MachineOperand &SID = MI->getOperand(5);
373 unsigned TextureId = MI->getOperand(6).getImm();
374 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
375 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
376
377 switch (TextureId) {
378 case 5: // Rect
379 CTX = CTY = 0;
380 break;
381 case 6: // Shadow1D
382 SrcW = SrcZ;
383 break;
384 case 7: // Shadow2D
385 SrcW = SrcZ;
386 break;
387 case 8: // ShadowRect
388 CTX = CTY = 0;
389 SrcW = SrcZ;
390 break;
391 case 9: // 1DArray
392 SrcZ = SrcY;
393 CTZ = 0;
394 break;
395 case 10: // 2DArray
396 CTZ = 0;
397 break;
398 case 11: // Shadow1DArray
399 SrcZ = SrcY;
400 CTZ = 0;
401 break;
402 case 12: // Shadow2DArray
403 CTZ = 0;
404 break;
405 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000406
407 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
408 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000409 .addImm(SrcX)
410 .addImm(SrcY)
411 .addImm(SrcZ)
412 .addImm(SrcW)
413 .addImm(0)
414 .addImm(0)
415 .addImm(0)
416 .addImm(0)
417 .addImm(1)
418 .addImm(2)
419 .addImm(3)
420 .addOperand(RID)
421 .addOperand(SID)
422 .addImm(CTX)
423 .addImm(CTY)
424 .addImm(CTZ)
425 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000426 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
427 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000428 .addImm(SrcX)
429 .addImm(SrcY)
430 .addImm(SrcZ)
431 .addImm(SrcW)
432 .addImm(0)
433 .addImm(0)
434 .addImm(0)
435 .addImm(0)
436 .addImm(1)
437 .addImm(2)
438 .addImm(3)
439 .addOperand(RID)
440 .addOperand(SID)
441 .addImm(CTX)
442 .addImm(CTY)
443 .addImm(CTZ)
444 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000445 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
446 .addOperand(MI->getOperand(0))
447 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000448 .addImm(SrcX)
449 .addImm(SrcY)
450 .addImm(SrcZ)
451 .addImm(SrcW)
452 .addImm(0)
453 .addImm(0)
454 .addImm(0)
455 .addImm(0)
456 .addImm(1)
457 .addImm(2)
458 .addImm(3)
459 .addOperand(RID)
460 .addOperand(SID)
461 .addImm(CTX)
462 .addImm(CTY)
463 .addImm(CTZ)
464 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000465 .addReg(T0, RegState::Implicit)
466 .addReg(T1, RegState::Implicit);
467 break;
468 }
469
470 case AMDGPU::BRANCH:
471 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000472 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 break;
474
475 case AMDGPU::BRANCH_COND_f32: {
476 MachineInstr *NewMI =
477 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
478 AMDGPU::PREDICATE_BIT)
479 .addOperand(MI->getOperand(1))
480 .addImm(OPCODE_IS_NOT_ZERO)
481 .addImm(0); // Flags
482 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000483 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 .addOperand(MI->getOperand(0))
485 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
486 break;
487 }
488
489 case AMDGPU::BRANCH_COND_i32: {
490 MachineInstr *NewMI =
491 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
492 AMDGPU::PREDICATE_BIT)
493 .addOperand(MI->getOperand(1))
494 .addImm(OPCODE_IS_NOT_ZERO_INT)
495 .addImm(0); // Flags
496 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000497 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 .addOperand(MI->getOperand(0))
499 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
500 break;
501 }
502
Tom Stellard75aadc22012-12-11 21:25:42 +0000503 case AMDGPU::EG_ExportSwz:
504 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000505 // Instruction is left unmodified if its not the last one of its type
506 bool isLastInstructionOfItsType = true;
507 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000508 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000509 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000510 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000511 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
512 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
513 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
514 .getImm();
515 if (CurrentInstExportType == InstExportType) {
516 isLastInstructionOfItsType = false;
517 break;
518 }
519 }
520 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000521 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000522 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000523 return BB;
524 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
525 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
526 .addOperand(MI->getOperand(0))
527 .addOperand(MI->getOperand(1))
528 .addOperand(MI->getOperand(2))
529 .addOperand(MI->getOperand(3))
530 .addOperand(MI->getOperand(4))
531 .addOperand(MI->getOperand(5))
532 .addOperand(MI->getOperand(6))
533 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000534 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 break;
536 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000537 case AMDGPU::RETURN: {
538 // RETURN instructions must have the live-out registers as implicit uses,
539 // otherwise they appear dead.
540 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
541 MachineInstrBuilder MIB(*MF, MI);
542 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
543 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
544 return BB;
545 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 }
547
548 MI->eraseFromParent();
549 return BB;
550}
551
552//===----------------------------------------------------------------------===//
553// Custom DAG Lowering Operations
554//===----------------------------------------------------------------------===//
555
Tom Stellard75aadc22012-12-11 21:25:42 +0000556SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000557 MachineFunction &MF = DAG.getMachineFunction();
558 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000559 switch (Op.getOpcode()) {
560 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000561 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
562 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000563 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000564 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000565 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000566 case ISD::FCOS:
567 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000569 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000570 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000571 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000572 case ISD::INTRINSIC_VOID: {
573 SDValue Chain = Op.getOperand(0);
574 unsigned IntrinsicID =
575 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
576 switch (IntrinsicID) {
577 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000578 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
579 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000580 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000581 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000582 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000583 case AMDGPUIntrinsic::R600_store_swizzle: {
584 const SDValue Args[8] = {
585 Chain,
586 Op.getOperand(2), // Export Value
587 Op.getOperand(3), // ArrayBase
588 Op.getOperand(4), // Type
589 DAG.getConstant(0, MVT::i32), // SWZ_X
590 DAG.getConstant(1, MVT::i32), // SWZ_Y
591 DAG.getConstant(2, MVT::i32), // SWZ_Z
592 DAG.getConstant(3, MVT::i32) // SWZ_W
593 };
Craig Topper48d114b2014-04-26 18:35:24 +0000594 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000595 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000596
Tom Stellard75aadc22012-12-11 21:25:42 +0000597 // default for switch(IntrinsicID)
598 default: break;
599 }
600 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
601 break;
602 }
603 case ISD::INTRINSIC_WO_CHAIN: {
604 unsigned IntrinsicID =
605 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
606 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000607 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000608 switch(IntrinsicID) {
609 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000610 case AMDGPUIntrinsic::R600_load_input: {
611 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
612 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
613 MachineFunction &MF = DAG.getMachineFunction();
614 MachineRegisterInfo &MRI = MF.getRegInfo();
615 MRI.addLiveIn(Reg);
616 return DAG.getCopyFromReg(DAG.getEntryNode(),
617 SDLoc(DAG.getEntryNode()), Reg, VT);
618 }
619
620 case AMDGPUIntrinsic::R600_interp_input: {
621 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
622 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
623 MachineSDNode *interp;
624 if (ijb < 0) {
625 const MachineFunction &MF = DAG.getMachineFunction();
626 const R600InstrInfo *TII =
627 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
628 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
629 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
630 return DAG.getTargetExtractSubreg(
631 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
632 DL, MVT::f32, SDValue(interp, 0));
633 }
634 MachineFunction &MF = DAG.getMachineFunction();
635 MachineRegisterInfo &MRI = MF.getRegInfo();
636 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
637 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
638 MRI.addLiveIn(RegisterI);
639 MRI.addLiveIn(RegisterJ);
640 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
641 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
642 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
643 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
644
645 if (slot % 4 < 2)
646 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
647 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
648 RegisterJNode, RegisterINode);
649 else
650 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
651 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
652 RegisterJNode, RegisterINode);
653 return SDValue(interp, slot % 2);
654 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000655 case AMDGPUIntrinsic::R600_interp_xy:
656 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000657 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000658 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000659 SDValue RegisterINode = Op.getOperand(2);
660 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000661
Vincent Lejeunef143af32013-11-11 22:10:24 +0000662 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000663 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000664 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000665 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000666 else
667 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000668 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000669 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000670 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
671 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000672 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000673 case AMDGPUIntrinsic::R600_tex:
674 case AMDGPUIntrinsic::R600_texc:
675 case AMDGPUIntrinsic::R600_txl:
676 case AMDGPUIntrinsic::R600_txlc:
677 case AMDGPUIntrinsic::R600_txb:
678 case AMDGPUIntrinsic::R600_txbc:
679 case AMDGPUIntrinsic::R600_txf:
680 case AMDGPUIntrinsic::R600_txq:
681 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000682 case AMDGPUIntrinsic::R600_ddy:
683 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000684 unsigned TextureOp;
685 switch (IntrinsicID) {
686 case AMDGPUIntrinsic::R600_tex:
687 TextureOp = 0;
688 break;
689 case AMDGPUIntrinsic::R600_texc:
690 TextureOp = 1;
691 break;
692 case AMDGPUIntrinsic::R600_txl:
693 TextureOp = 2;
694 break;
695 case AMDGPUIntrinsic::R600_txlc:
696 TextureOp = 3;
697 break;
698 case AMDGPUIntrinsic::R600_txb:
699 TextureOp = 4;
700 break;
701 case AMDGPUIntrinsic::R600_txbc:
702 TextureOp = 5;
703 break;
704 case AMDGPUIntrinsic::R600_txf:
705 TextureOp = 6;
706 break;
707 case AMDGPUIntrinsic::R600_txq:
708 TextureOp = 7;
709 break;
710 case AMDGPUIntrinsic::R600_ddx:
711 TextureOp = 8;
712 break;
713 case AMDGPUIntrinsic::R600_ddy:
714 TextureOp = 9;
715 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000716 case AMDGPUIntrinsic::R600_ldptr:
717 TextureOp = 10;
718 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000719 default:
720 llvm_unreachable("Unknow Texture Operation");
721 }
722
723 SDValue TexArgs[19] = {
724 DAG.getConstant(TextureOp, MVT::i32),
725 Op.getOperand(1),
726 DAG.getConstant(0, MVT::i32),
727 DAG.getConstant(1, MVT::i32),
728 DAG.getConstant(2, MVT::i32),
729 DAG.getConstant(3, MVT::i32),
730 Op.getOperand(2),
731 Op.getOperand(3),
732 Op.getOperand(4),
733 DAG.getConstant(0, MVT::i32),
734 DAG.getConstant(1, MVT::i32),
735 DAG.getConstant(2, MVT::i32),
736 DAG.getConstant(3, MVT::i32),
737 Op.getOperand(5),
738 Op.getOperand(6),
739 Op.getOperand(7),
740 Op.getOperand(8),
741 Op.getOperand(9),
742 Op.getOperand(10)
743 };
Craig Topper48d114b2014-04-26 18:35:24 +0000744 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000745 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000746 case AMDGPUIntrinsic::AMDGPU_dp4: {
747 SDValue Args[8] = {
748 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
749 DAG.getConstant(0, MVT::i32)),
750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
751 DAG.getConstant(0, MVT::i32)),
752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
753 DAG.getConstant(1, MVT::i32)),
754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
755 DAG.getConstant(1, MVT::i32)),
756 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
757 DAG.getConstant(2, MVT::i32)),
758 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
759 DAG.getConstant(2, MVT::i32)),
760 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
761 DAG.getConstant(3, MVT::i32)),
762 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
763 DAG.getConstant(3, MVT::i32))
764 };
Craig Topper48d114b2014-04-26 18:35:24 +0000765 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000766 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000767
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return LowerImplicitParameter(DAG, VT, DL, 8);
786
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000787 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000788 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
789 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
792 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
804 AMDGPU::T0_Z, VT);
805 }
806 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
807 break;
808 }
809 } // end switch(Op.getOpcode())
810 return SDValue();
811}
812
813void R600TargetLowering::ReplaceNodeResults(SDNode *N,
814 SmallVectorImpl<SDValue> &Results,
815 SelectionDAG &DAG) const {
816 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000817 default:
818 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
819 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000820 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000821 return;
822 case ISD::LOAD: {
823 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
824 Results.push_back(SDValue(Node, 0));
825 Results.push_back(SDValue(Node, 1));
826 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
827 // function
828 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
829 return;
830 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000831 case ISD::STORE: {
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000832 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
833 Results.push_back(SDValue(Node, 0));
834 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000835 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000836 case ISD::UDIV: {
837 SDValue Op = SDValue(N, 0);
838 SDLoc DL(Op);
839 EVT VT = Op.getValueType();
840 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
841 N->getOperand(0), N->getOperand(1));
842 Results.push_back(UDIVREM);
843 break;
844 }
845 case ISD::UREM: {
846 SDValue Op = SDValue(N, 0);
847 SDLoc DL(Op);
848 EVT VT = Op.getValueType();
849 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
850 N->getOperand(0), N->getOperand(1));
851 Results.push_back(UDIVREM.getValue(1));
852 break;
853 }
854 case ISD::SDIV: {
855 SDValue Op = SDValue(N, 0);
856 SDLoc DL(Op);
857 EVT VT = Op.getValueType();
858 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
859 N->getOperand(0), N->getOperand(1));
860 Results.push_back(SDIVREM);
861 break;
862 }
863 case ISD::SREM: {
864 SDValue Op = SDValue(N, 0);
865 SDLoc DL(Op);
866 EVT VT = Op.getValueType();
867 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
868 N->getOperand(0), N->getOperand(1));
869 Results.push_back(SDIVREM.getValue(1));
870 break;
871 }
872 case ISD::SDIVREM: {
873 SDValue Op = SDValue(N, 1);
874 SDValue RES = LowerSDIVREM(Op, DAG);
875 Results.push_back(RES);
876 Results.push_back(RES.getValue(1));
877 break;
878 }
879 case ISD::UDIVREM: {
880 SDValue Op = SDValue(N, 0);
881 SDLoc DL(Op);
882 EVT VT = Op.getValueType();
883 EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
884
885 SDValue one = DAG.getConstant(1, HalfVT);
886 SDValue zero = DAG.getConstant(0, HalfVT);
887
888 //HiLo split
889 SDValue LHS = N->getOperand(0);
890 SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
891 SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
892
893 SDValue RHS = N->getOperand(1);
894 SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
895 SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
896
897 // Get Speculative values
898 SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
899 SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
900
901 SDValue REM_Hi = zero;
902 SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
903
904 SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
905 SDValue DIV_Lo = zero;
906
907 const unsigned halfBitWidth = HalfVT.getSizeInBits();
908
909 for (unsigned i = 0; i < halfBitWidth; ++i) {
910 SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
911 // Get Value of high bit
912 SDValue HBit;
913 if (halfBitWidth == 32 && Subtarget->hasBFE()) {
914 HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
915 } else {
916 HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
917 HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
918 }
919
920 SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
921 DAG.getConstant(halfBitWidth - 1, HalfVT));
922 REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
923 REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
924
925 REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
926 REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
927
928
929 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
930
931 SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
932 SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
933
934 DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
935
936 // Update REM
937
938 SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
939
940 REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
941 REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
942 REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
943 }
944
945 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
946 SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
947 Results.push_back(DIV);
948 Results.push_back(REM);
949 break;
950 }
951 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000952}
953
Tom Stellard880a80a2014-06-17 16:53:14 +0000954SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
955 SDValue Vector) const {
956
957 SDLoc DL(Vector);
958 EVT VecVT = Vector.getValueType();
959 EVT EltVT = VecVT.getVectorElementType();
960 SmallVector<SDValue, 8> Args;
961
962 for (unsigned i = 0, e = VecVT.getVectorNumElements();
963 i != e; ++i) {
964 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
965 Vector, DAG.getConstant(i, getVectorIdxTy())));
966 }
967
968 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
969}
970
971SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
972 SelectionDAG &DAG) const {
973
974 SDLoc DL(Op);
975 SDValue Vector = Op.getOperand(0);
976 SDValue Index = Op.getOperand(1);
977
978 if (isa<ConstantSDNode>(Index) ||
979 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
980 return Op;
981
982 Vector = vectorToVerticalVector(DAG, Vector);
983 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
984 Vector, Index);
985}
986
987SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
988 SelectionDAG &DAG) const {
989 SDLoc DL(Op);
990 SDValue Vector = Op.getOperand(0);
991 SDValue Value = Op.getOperand(1);
992 SDValue Index = Op.getOperand(2);
993
994 if (isa<ConstantSDNode>(Index) ||
995 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
996 return Op;
997
998 Vector = vectorToVerticalVector(DAG, Vector);
999 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
1000 Vector, Value, Index);
1001 return vectorToVerticalVector(DAG, Insert);
1002}
1003
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001004SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1005 // On hw >= R700, COS/SIN input must be between -1. and 1.
1006 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
1007 EVT VT = Op.getValueType();
1008 SDValue Arg = Op.getOperand(0);
1009 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
1010 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
1011 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
1012 DAG.getConstantFP(0.15915494309, MVT::f32)),
1013 DAG.getConstantFP(0.5, MVT::f32)));
1014 unsigned TrigNode;
1015 switch (Op.getOpcode()) {
1016 case ISD::FCOS:
1017 TrigNode = AMDGPUISD::COS_HW;
1018 break;
1019 case ISD::FSIN:
1020 TrigNode = AMDGPUISD::SIN_HW;
1021 break;
1022 default:
1023 llvm_unreachable("Wrong trig opcode");
1024 }
1025 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
1026 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
1027 DAG.getConstantFP(-0.5, MVT::f32)));
1028 if (Gen >= AMDGPUSubtarget::R700)
1029 return TrigVal;
1030 // On R600 hw, COS/SIN input must be between -Pi and Pi.
1031 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1032 DAG.getConstantFP(3.14159265359, MVT::f32));
1033}
1034
Jan Vesely25f36272014-06-18 12:27:13 +00001035SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1036 SDLoc DL(Op);
1037 EVT VT = Op.getValueType();
1038
1039 SDValue Lo = Op.getOperand(0);
1040 SDValue Hi = Op.getOperand(1);
1041 SDValue Shift = Op.getOperand(2);
1042 SDValue Zero = DAG.getConstant(0, VT);
1043 SDValue One = DAG.getConstant(1, VT);
1044
1045 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1046 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1047 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1048 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1049
1050 // The dance around Width1 is necessary for 0 special case.
1051 // Without it the CompShift might be 32, producing incorrect results in
1052 // Overflow. So we do the shift in two steps, the alternative is to
1053 // add a conditional to filter the special case.
1054
1055 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1056 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1057
1058 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1059 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1060 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1061
1062 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1063 SDValue LoBig = Zero;
1064
1065 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1066 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1067
1068 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1069}
1070
Jan Vesely900ff2e2014-06-18 12:27:15 +00001071SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1072 SDLoc DL(Op);
1073 EVT VT = Op.getValueType();
1074
1075 SDValue Lo = Op.getOperand(0);
1076 SDValue Hi = Op.getOperand(1);
1077 SDValue Shift = Op.getOperand(2);
1078 SDValue Zero = DAG.getConstant(0, VT);
1079 SDValue One = DAG.getConstant(1, VT);
1080
Jan Veselyecf51332014-06-18 12:27:17 +00001081 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1082
Jan Vesely900ff2e2014-06-18 12:27:15 +00001083 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1084 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1085 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1086 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1087
1088 // The dance around Width1 is necessary for 0 special case.
1089 // Without it the CompShift might be 32, producing incorrect results in
1090 // Overflow. So we do the shift in two steps, the alternative is to
1091 // add a conditional to filter the special case.
1092
1093 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1094 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1095
Jan Veselyecf51332014-06-18 12:27:17 +00001096 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001097 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1098 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1099
Jan Veselyecf51332014-06-18 12:27:17 +00001100 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1101 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001102
1103 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1104 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1105
1106 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1107}
1108
Tom Stellard75aadc22012-12-11 21:25:42 +00001109SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1110 return DAG.getNode(
1111 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001112 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001113 MVT::i1,
1114 Op, DAG.getConstantFP(0.0f, MVT::f32),
1115 DAG.getCondCode(ISD::SETNE)
1116 );
1117}
1118
Tom Stellard75aadc22012-12-11 21:25:42 +00001119SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001120 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001121 unsigned DwordOffset) const {
1122 unsigned ByteOffset = DwordOffset * 4;
1123 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001124 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001125
1126 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1127 assert(isInt<16>(ByteOffset));
1128
1129 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1130 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1131 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1132 false, false, false, 0);
1133}
1134
Tom Stellard75aadc22012-12-11 21:25:42 +00001135bool R600TargetLowering::isZero(SDValue Op) const {
1136 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1137 return Cst->isNullValue();
1138 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1139 return CstFP->isZero();
1140 } else {
1141 return false;
1142 }
1143}
1144
1145SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001146 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001147 EVT VT = Op.getValueType();
1148
1149 SDValue LHS = Op.getOperand(0);
1150 SDValue RHS = Op.getOperand(1);
1151 SDValue True = Op.getOperand(2);
1152 SDValue False = Op.getOperand(3);
1153 SDValue CC = Op.getOperand(4);
1154 SDValue Temp;
1155
1156 // LHS and RHS are guaranteed to be the same value type
1157 EVT CompareVT = LHS.getValueType();
1158
1159 // Check if we can lower this to a native operation.
1160
Tom Stellard2add82d2013-03-08 15:37:09 +00001161 // Try to lower to a SET* instruction:
1162 //
1163 // SET* can match the following patterns:
1164 //
Tom Stellardcd428182013-09-28 02:50:38 +00001165 // select_cc f32, f32, -1, 0, cc_supported
1166 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1167 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001168 //
1169
1170 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001171 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1172 ISD::CondCode InverseCC =
1173 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001174 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1175 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1176 std::swap(False, True);
1177 CC = DAG.getCondCode(InverseCC);
1178 } else {
1179 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1180 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1181 std::swap(False, True);
1182 std::swap(LHS, RHS);
1183 CC = DAG.getCondCode(SwapInvCC);
1184 }
1185 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001186 }
1187
1188 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1189 (CompareVT == VT || VT == MVT::i32)) {
1190 // This can be matched by a SET* instruction.
1191 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1192 }
1193
Tom Stellard75aadc22012-12-11 21:25:42 +00001194 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001195 //
1196 // CND* can match the following patterns:
1197 //
Tom Stellardcd428182013-09-28 02:50:38 +00001198 // select_cc f32, 0.0, f32, f32, cc_supported
1199 // select_cc f32, 0.0, i32, i32, cc_supported
1200 // select_cc i32, 0, f32, f32, cc_supported
1201 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001202 //
Tom Stellardcd428182013-09-28 02:50:38 +00001203
1204 // Try to move the zero value to the RHS
1205 if (isZero(LHS)) {
1206 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1207 // Try swapping the operands
1208 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1209 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1210 std::swap(LHS, RHS);
1211 CC = DAG.getCondCode(CCSwapped);
1212 } else {
1213 // Try inverting the conditon and then swapping the operands
1214 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1215 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1216 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1217 std::swap(True, False);
1218 std::swap(LHS, RHS);
1219 CC = DAG.getCondCode(CCSwapped);
1220 }
1221 }
1222 }
1223 if (isZero(RHS)) {
1224 SDValue Cond = LHS;
1225 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001226 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1227 if (CompareVT != VT) {
1228 // Bitcast True / False to the correct types. This will end up being
1229 // a nop, but it allows us to define only a single pattern in the
1230 // .TD files for each CND* instruction rather than having to have
1231 // one pattern for integer True/False and one for fp True/False
1232 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1233 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1234 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001235
1236 switch (CCOpcode) {
1237 case ISD::SETONE:
1238 case ISD::SETUNE:
1239 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001240 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1241 Temp = True;
1242 True = False;
1243 False = Temp;
1244 break;
1245 default:
1246 break;
1247 }
1248 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1249 Cond, Zero,
1250 True, False,
1251 DAG.getCondCode(CCOpcode));
1252 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1253 }
1254
Tom Stellard75aadc22012-12-11 21:25:42 +00001255 // If we make it this for it means we have no native instructions to handle
1256 // this SELECT_CC, so we must lower it.
1257 SDValue HWTrue, HWFalse;
1258
1259 if (CompareVT == MVT::f32) {
1260 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1261 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1262 } else if (CompareVT == MVT::i32) {
1263 HWTrue = DAG.getConstant(-1, CompareVT);
1264 HWFalse = DAG.getConstant(0, CompareVT);
1265 }
1266 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001267 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001268 }
1269
1270 // Lower this unsupported SELECT_CC into a combination of two supported
1271 // SELECT_CC operations.
1272 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1273
1274 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1275 Cond, HWFalse,
1276 True, False,
1277 DAG.getCondCode(ISD::SETNE));
1278}
1279
Alp Tokercb402912014-01-24 17:20:08 +00001280/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001281/// convert these pointers to a register index. Each register holds
1282/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1283/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1284/// for indirect addressing.
1285SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1286 unsigned StackWidth,
1287 SelectionDAG &DAG) const {
1288 unsigned SRLPad;
1289 switch(StackWidth) {
1290 case 1:
1291 SRLPad = 2;
1292 break;
1293 case 2:
1294 SRLPad = 3;
1295 break;
1296 case 4:
1297 SRLPad = 4;
1298 break;
1299 default: llvm_unreachable("Invalid stack width");
1300 }
1301
Andrew Trickef9de2a2013-05-25 02:42:55 +00001302 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001303 DAG.getConstant(SRLPad, MVT::i32));
1304}
1305
1306void R600TargetLowering::getStackAddress(unsigned StackWidth,
1307 unsigned ElemIdx,
1308 unsigned &Channel,
1309 unsigned &PtrIncr) const {
1310 switch (StackWidth) {
1311 default:
1312 case 1:
1313 Channel = 0;
1314 if (ElemIdx > 0) {
1315 PtrIncr = 1;
1316 } else {
1317 PtrIncr = 0;
1318 }
1319 break;
1320 case 2:
1321 Channel = ElemIdx % 2;
1322 if (ElemIdx == 2) {
1323 PtrIncr = 1;
1324 } else {
1325 PtrIncr = 0;
1326 }
1327 break;
1328 case 4:
1329 Channel = ElemIdx;
1330 PtrIncr = 0;
1331 break;
1332 }
1333}
1334
Tom Stellard75aadc22012-12-11 21:25:42 +00001335SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001336 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001337 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1338 SDValue Chain = Op.getOperand(0);
1339 SDValue Value = Op.getOperand(1);
1340 SDValue Ptr = Op.getOperand(2);
1341
Tom Stellard2ffc3302013-08-26 15:05:44 +00001342 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001343 if (Result.getNode()) {
1344 return Result;
1345 }
1346
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001347 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1348 if (StoreNode->isTruncatingStore()) {
1349 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001350 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001351 EVT MemVT = StoreNode->getMemoryVT();
1352 SDValue MaskConstant;
1353 if (MemVT == MVT::i8) {
1354 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1355 } else {
1356 assert(MemVT == MVT::i16);
1357 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1358 }
1359 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1360 DAG.getConstant(2, MVT::i32));
1361 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1362 DAG.getConstant(0x00000003, VT));
1363 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1364 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1365 DAG.getConstant(3, VT));
1366 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1367 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1368 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1369 // vector instead.
1370 SDValue Src[4] = {
1371 ShiftedValue,
1372 DAG.getConstant(0, MVT::i32),
1373 DAG.getConstant(0, MVT::i32),
1374 Mask
1375 };
Craig Topper48d114b2014-04-26 18:35:24 +00001376 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001377 SDValue Args[3] = { Chain, Input, DWordAddr };
1378 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001379 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001380 StoreNode->getMemOperand());
1381 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1382 Value.getValueType().bitsGE(MVT::i32)) {
1383 // Convert pointer from byte address to dword address.
1384 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1385 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1386 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001387
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001388 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001389 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001390 } else {
1391 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1392 }
1393 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001394 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001395 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001396
1397 EVT ValueVT = Value.getValueType();
1398
1399 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1400 return SDValue();
1401 }
1402
Tom Stellarde9373602014-01-22 19:24:14 +00001403 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1404 if (Ret.getNode()) {
1405 return Ret;
1406 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001407 // Lowering for indirect addressing
1408
1409 const MachineFunction &MF = DAG.getMachineFunction();
1410 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1411 getTargetMachine().getFrameLowering());
1412 unsigned StackWidth = TFL->getStackWidth(MF);
1413
1414 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1415
1416 if (ValueVT.isVector()) {
1417 unsigned NumElemVT = ValueVT.getVectorNumElements();
1418 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001419 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001420
1421 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1422 "vector width in load");
1423
1424 for (unsigned i = 0; i < NumElemVT; ++i) {
1425 unsigned Channel, PtrIncr;
1426 getStackAddress(StackWidth, i, Channel, PtrIncr);
1427 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1428 DAG.getConstant(PtrIncr, MVT::i32));
1429 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1430 Value, DAG.getConstant(i, MVT::i32));
1431
1432 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1433 Chain, Elem, Ptr,
1434 DAG.getTargetConstant(Channel, MVT::i32));
1435 }
Craig Topper48d114b2014-04-26 18:35:24 +00001436 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001437 } else {
1438 if (ValueVT == MVT::i8) {
1439 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1440 }
1441 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001442 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001443 }
1444
1445 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001446}
1447
Tom Stellard365366f2013-01-23 02:09:06 +00001448// return (512 + (kc_bank << 12)
1449static int
1450ConstantAddressBlock(unsigned AddressSpace) {
1451 switch (AddressSpace) {
1452 case AMDGPUAS::CONSTANT_BUFFER_0:
1453 return 512;
1454 case AMDGPUAS::CONSTANT_BUFFER_1:
1455 return 512 + 4096;
1456 case AMDGPUAS::CONSTANT_BUFFER_2:
1457 return 512 + 4096 * 2;
1458 case AMDGPUAS::CONSTANT_BUFFER_3:
1459 return 512 + 4096 * 3;
1460 case AMDGPUAS::CONSTANT_BUFFER_4:
1461 return 512 + 4096 * 4;
1462 case AMDGPUAS::CONSTANT_BUFFER_5:
1463 return 512 + 4096 * 5;
1464 case AMDGPUAS::CONSTANT_BUFFER_6:
1465 return 512 + 4096 * 6;
1466 case AMDGPUAS::CONSTANT_BUFFER_7:
1467 return 512 + 4096 * 7;
1468 case AMDGPUAS::CONSTANT_BUFFER_8:
1469 return 512 + 4096 * 8;
1470 case AMDGPUAS::CONSTANT_BUFFER_9:
1471 return 512 + 4096 * 9;
1472 case AMDGPUAS::CONSTANT_BUFFER_10:
1473 return 512 + 4096 * 10;
1474 case AMDGPUAS::CONSTANT_BUFFER_11:
1475 return 512 + 4096 * 11;
1476 case AMDGPUAS::CONSTANT_BUFFER_12:
1477 return 512 + 4096 * 12;
1478 case AMDGPUAS::CONSTANT_BUFFER_13:
1479 return 512 + 4096 * 13;
1480 case AMDGPUAS::CONSTANT_BUFFER_14:
1481 return 512 + 4096 * 14;
1482 case AMDGPUAS::CONSTANT_BUFFER_15:
1483 return 512 + 4096 * 15;
1484 default:
1485 return -1;
1486 }
1487}
1488
1489SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1490{
1491 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001492 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001493 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1494 SDValue Chain = Op.getOperand(0);
1495 SDValue Ptr = Op.getOperand(1);
1496 SDValue LoweredLoad;
1497
Tom Stellarde9373602014-01-22 19:24:14 +00001498 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1499 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001500 SDValue Ops[2] = {
1501 Ret,
1502 Chain
1503 };
Craig Topper64941d92014-04-27 19:20:57 +00001504 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001505 }
1506
1507
Tom Stellard35bb18c2013-08-26 15:06:04 +00001508 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1509 SDValue MergedValues[2] = {
1510 SplitVectorLoad(Op, DAG),
1511 Chain
1512 };
Craig Topper64941d92014-04-27 19:20:57 +00001513 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001514 }
1515
Tom Stellard365366f2013-01-23 02:09:06 +00001516 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001517 if (ConstantBlock > -1 &&
1518 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1519 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001520 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001521 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1522 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001523 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001524 SDValue Slots[4];
1525 for (unsigned i = 0; i < 4; i++) {
1526 // We want Const position encoded with the following formula :
1527 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1528 // const_index is Ptr computed by llvm using an alignment of 16.
1529 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1530 // then div by 4 at the ISel step
1531 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1532 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1533 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1534 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001535 EVT NewVT = MVT::v4i32;
1536 unsigned NumElements = 4;
1537 if (VT.isVector()) {
1538 NewVT = VT;
1539 NumElements = VT.getVectorNumElements();
1540 }
Craig Topper48d114b2014-04-26 18:35:24 +00001541 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001542 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001543 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001544 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001545 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001546 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001547 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001548 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001549 );
1550 }
1551
1552 if (!VT.isVector()) {
1553 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1554 DAG.getConstant(0, MVT::i32));
1555 }
1556
1557 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001558 Result,
1559 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001560 };
Craig Topper64941d92014-04-27 19:20:57 +00001561 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001562 }
1563
Matt Arsenault909d0c02013-10-30 23:43:29 +00001564 // For most operations returning SDValue() will result in the node being
1565 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1566 // need to manually expand loads that may be legal in some address spaces and
1567 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1568 // compute shaders, since the data is sign extended when it is uploaded to the
1569 // buffer. However SEXT loads from other address spaces are not supported, so
1570 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001571 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1572 EVT MemVT = LoadNode->getMemoryVT();
1573 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1574 SDValue ShiftAmount =
1575 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1576 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1577 LoadNode->getPointerInfo(), MemVT,
1578 LoadNode->isVolatile(),
1579 LoadNode->isNonTemporal(),
1580 LoadNode->getAlignment());
1581 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1582 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1583
1584 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001585 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001586 }
1587
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001588 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1589 return SDValue();
1590 }
1591
1592 // Lowering for indirect addressing
1593 const MachineFunction &MF = DAG.getMachineFunction();
1594 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1595 getTargetMachine().getFrameLowering());
1596 unsigned StackWidth = TFL->getStackWidth(MF);
1597
1598 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1599
1600 if (VT.isVector()) {
1601 unsigned NumElemVT = VT.getVectorNumElements();
1602 EVT ElemVT = VT.getVectorElementType();
1603 SDValue Loads[4];
1604
1605 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1606 "vector width in load");
1607
1608 for (unsigned i = 0; i < NumElemVT; ++i) {
1609 unsigned Channel, PtrIncr;
1610 getStackAddress(StackWidth, i, Channel, PtrIncr);
1611 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1612 DAG.getConstant(PtrIncr, MVT::i32));
1613 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1614 Chain, Ptr,
1615 DAG.getTargetConstant(Channel, MVT::i32),
1616 Op.getOperand(2));
1617 }
1618 for (unsigned i = NumElemVT; i < 4; ++i) {
1619 Loads[i] = DAG.getUNDEF(ElemVT);
1620 }
1621 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001622 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001623 } else {
1624 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1625 Chain, Ptr,
1626 DAG.getTargetConstant(0, MVT::i32), // Channel
1627 Op.getOperand(2));
1628 }
1629
Matt Arsenault7939acd2014-04-07 16:44:24 +00001630 SDValue Ops[2] = {
1631 LoweredLoad,
1632 Chain
1633 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001634
Craig Topper64941d92014-04-27 19:20:57 +00001635 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001636}
Tom Stellard75aadc22012-12-11 21:25:42 +00001637
Tom Stellard75aadc22012-12-11 21:25:42 +00001638/// XXX Only kernel functions are supported, so we can assume for now that
1639/// every function is a kernel function, but in the future we should use
1640/// separate calling conventions for kernel and non-kernel functions.
1641SDValue R600TargetLowering::LowerFormalArguments(
1642 SDValue Chain,
1643 CallingConv::ID CallConv,
1644 bool isVarArg,
1645 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001646 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001647 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001648 SmallVector<CCValAssign, 16> ArgLocs;
1649 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1650 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001651 MachineFunction &MF = DAG.getMachineFunction();
1652 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001653
Tom Stellardaf775432013-10-23 00:44:32 +00001654 SmallVector<ISD::InputArg, 8> LocalIns;
1655
Matt Arsenault209a7b92014-04-18 07:40:20 +00001656 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001657
1658 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001659
Tom Stellard1e803092013-07-23 01:48:18 +00001660 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001661 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001662 EVT VT = Ins[i].VT;
1663 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001664
Vincent Lejeunef143af32013-11-11 22:10:24 +00001665 if (ShaderType != ShaderType::COMPUTE) {
1666 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1667 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1668 InVals.push_back(Register);
1669 continue;
1670 }
1671
Tom Stellard75aadc22012-12-11 21:25:42 +00001672 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001673 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001674
Matt Arsenaultfae02982014-03-17 18:58:11 +00001675 // i64 isn't a legal type, so the register type used ends up as i32, which
1676 // isn't expected here. It attempts to create this sextload, but it ends up
1677 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1678 // for <1 x i64>.
1679
Tom Stellardacfeebf2013-07-23 01:48:05 +00001680 // The first 36 bytes of the input buffer contains information about
1681 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001682
1683 // FIXME: This should really check the extload type, but the handling of
1684 // extload vecto parameters seems to be broken.
1685 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1686 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1687 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001688 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1689 MachinePointerInfo(UndefValue::get(PtrTy)),
1690 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001691
1692 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001693 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001694 }
1695 return Chain;
1696}
1697
Matt Arsenault758659232013-05-18 00:21:46 +00001698EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001699 if (!VT.isVector())
1700 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001701 return VT.changeVectorElementTypeToInteger();
1702}
1703
Matt Arsenault209a7b92014-04-18 07:40:20 +00001704static SDValue CompactSwizzlableVector(
1705 SelectionDAG &DAG, SDValue VectorEntry,
1706 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001707 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1708 assert(RemapSwizzle.empty());
1709 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001710 VectorEntry.getOperand(0),
1711 VectorEntry.getOperand(1),
1712 VectorEntry.getOperand(2),
1713 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001714 };
1715
1716 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001717 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1718 // We mask write here to teach later passes that the ith element of this
1719 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1720 // break false dependencies and additionnaly make assembly easier to read.
1721 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001722 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1723 if (C->isZero()) {
1724 RemapSwizzle[i] = 4; // SEL_0
1725 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1726 } else if (C->isExactlyValue(1.0)) {
1727 RemapSwizzle[i] = 5; // SEL_1
1728 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1729 }
1730 }
1731
1732 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1733 continue;
1734 for (unsigned j = 0; j < i; j++) {
1735 if (NewBldVec[i] == NewBldVec[j]) {
1736 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1737 RemapSwizzle[i] = j;
1738 break;
1739 }
1740 }
1741 }
1742
1743 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001744 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001745}
1746
Benjamin Kramer193960c2013-06-11 13:32:25 +00001747static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1748 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001749 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1750 assert(RemapSwizzle.empty());
1751 SDValue NewBldVec[4] = {
1752 VectorEntry.getOperand(0),
1753 VectorEntry.getOperand(1),
1754 VectorEntry.getOperand(2),
1755 VectorEntry.getOperand(3)
1756 };
1757 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001758 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001759 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001760 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1761 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1762 ->getZExtValue();
1763 if (i == Idx)
1764 isUnmovable[Idx] = true;
1765 }
1766 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001767
1768 for (unsigned i = 0; i < 4; i++) {
1769 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1770 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1771 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001772 if (isUnmovable[Idx])
1773 continue;
1774 // Swap i and Idx
1775 std::swap(NewBldVec[Idx], NewBldVec[i]);
1776 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1777 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001778 }
1779 }
1780
1781 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001782 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001783}
1784
1785
1786SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1787SDValue Swz[4], SelectionDAG &DAG) const {
1788 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1789 // Old -> New swizzle values
1790 DenseMap<unsigned, unsigned> SwizzleRemap;
1791
1792 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1793 for (unsigned i = 0; i < 4; i++) {
1794 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1795 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1796 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1797 }
1798
1799 SwizzleRemap.clear();
1800 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1801 for (unsigned i = 0; i < 4; i++) {
1802 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1803 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1804 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1805 }
1806
1807 return BuildVector;
1808}
1809
1810
Tom Stellard75aadc22012-12-11 21:25:42 +00001811//===----------------------------------------------------------------------===//
1812// Custom DAG Optimizations
1813//===----------------------------------------------------------------------===//
1814
1815SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1816 DAGCombinerInfo &DCI) const {
1817 SelectionDAG &DAG = DCI.DAG;
1818
1819 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001820 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001821 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1822 case ISD::FP_ROUND: {
1823 SDValue Arg = N->getOperand(0);
1824 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001825 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001826 Arg.getOperand(0));
1827 }
1828 break;
1829 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001830
1831 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1832 // (i32 select_cc f32, f32, -1, 0 cc)
1833 //
1834 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1835 // this to one of the SET*_DX10 instructions.
1836 case ISD::FP_TO_SINT: {
1837 SDValue FNeg = N->getOperand(0);
1838 if (FNeg.getOpcode() != ISD::FNEG) {
1839 return SDValue();
1840 }
1841 SDValue SelectCC = FNeg.getOperand(0);
1842 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1843 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1844 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1845 !isHWTrueValue(SelectCC.getOperand(2)) ||
1846 !isHWFalseValue(SelectCC.getOperand(3))) {
1847 return SDValue();
1848 }
1849
Andrew Trickef9de2a2013-05-25 02:42:55 +00001850 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001851 SelectCC.getOperand(0), // LHS
1852 SelectCC.getOperand(1), // RHS
1853 DAG.getConstant(-1, MVT::i32), // True
1854 DAG.getConstant(0, MVT::i32), // Flase
1855 SelectCC.getOperand(4)); // CC
1856
1857 break;
1858 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001859
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001860 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1861 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001862 case ISD::INSERT_VECTOR_ELT: {
1863 SDValue InVec = N->getOperand(0);
1864 SDValue InVal = N->getOperand(1);
1865 SDValue EltNo = N->getOperand(2);
1866 SDLoc dl(N);
1867
1868 // If the inserted element is an UNDEF, just use the input vector.
1869 if (InVal.getOpcode() == ISD::UNDEF)
1870 return InVec;
1871
1872 EVT VT = InVec.getValueType();
1873
1874 // If we can't generate a legal BUILD_VECTOR, exit
1875 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1876 return SDValue();
1877
1878 // Check that we know which element is being inserted
1879 if (!isa<ConstantSDNode>(EltNo))
1880 return SDValue();
1881 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1882
1883 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1884 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1885 // vector elements.
1886 SmallVector<SDValue, 8> Ops;
1887 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1888 Ops.append(InVec.getNode()->op_begin(),
1889 InVec.getNode()->op_end());
1890 } else if (InVec.getOpcode() == ISD::UNDEF) {
1891 unsigned NElts = VT.getVectorNumElements();
1892 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1893 } else {
1894 return SDValue();
1895 }
1896
1897 // Insert the element
1898 if (Elt < Ops.size()) {
1899 // All the operands of BUILD_VECTOR must have the same type;
1900 // we enforce that here.
1901 EVT OpVT = Ops[0].getValueType();
1902 if (InVal.getValueType() != OpVT)
1903 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1904 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1905 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1906 Ops[Elt] = InVal;
1907 }
1908
1909 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001910 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001911 }
1912
Tom Stellard365366f2013-01-23 02:09:06 +00001913 // Extract_vec (Build_vector) generated by custom lowering
1914 // also needs to be customly combined
1915 case ISD::EXTRACT_VECTOR_ELT: {
1916 SDValue Arg = N->getOperand(0);
1917 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1918 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1919 unsigned Element = Const->getZExtValue();
1920 return Arg->getOperand(Element);
1921 }
1922 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001923 if (Arg.getOpcode() == ISD::BITCAST &&
1924 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1925 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1926 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001927 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001928 Arg->getOperand(0).getOperand(Element));
1929 }
1930 }
Tom Stellard365366f2013-01-23 02:09:06 +00001931 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001932
1933 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001934 // Try common optimizations
1935 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1936 if (Ret.getNode())
1937 return Ret;
1938
Tom Stellarde06163a2013-02-07 14:02:35 +00001939 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1940 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001941 //
1942 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1943 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001944 SDValue LHS = N->getOperand(0);
1945 if (LHS.getOpcode() != ISD::SELECT_CC) {
1946 return SDValue();
1947 }
1948
1949 SDValue RHS = N->getOperand(1);
1950 SDValue True = N->getOperand(2);
1951 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001952 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001953
1954 if (LHS.getOperand(2).getNode() != True.getNode() ||
1955 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001956 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001957 return SDValue();
1958 }
1959
Tom Stellard5e524892013-03-08 15:37:11 +00001960 switch (NCC) {
1961 default: return SDValue();
1962 case ISD::SETNE: return LHS;
1963 case ISD::SETEQ: {
1964 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1965 LHSCC = ISD::getSetCCInverse(LHSCC,
1966 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001967 if (DCI.isBeforeLegalizeOps() ||
1968 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1969 return DAG.getSelectCC(SDLoc(N),
1970 LHS.getOperand(0),
1971 LHS.getOperand(1),
1972 LHS.getOperand(2),
1973 LHS.getOperand(3),
1974 LHSCC);
1975 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001976 }
Tom Stellard5e524892013-03-08 15:37:11 +00001977 }
Tom Stellardcd428182013-09-28 02:50:38 +00001978 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001979 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001980
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001981 case AMDGPUISD::EXPORT: {
1982 SDValue Arg = N->getOperand(1);
1983 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1984 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001985
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001986 SDValue NewArgs[8] = {
1987 N->getOperand(0), // Chain
1988 SDValue(),
1989 N->getOperand(2), // ArrayBase
1990 N->getOperand(3), // Type
1991 N->getOperand(4), // SWZ_X
1992 N->getOperand(5), // SWZ_Y
1993 N->getOperand(6), // SWZ_Z
1994 N->getOperand(7) // SWZ_W
1995 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001996 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001997 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001998 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001999 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002000 case AMDGPUISD::TEXTURE_FETCH: {
2001 SDValue Arg = N->getOperand(1);
2002 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2003 break;
2004
2005 SDValue NewArgs[19] = {
2006 N->getOperand(0),
2007 N->getOperand(1),
2008 N->getOperand(2),
2009 N->getOperand(3),
2010 N->getOperand(4),
2011 N->getOperand(5),
2012 N->getOperand(6),
2013 N->getOperand(7),
2014 N->getOperand(8),
2015 N->getOperand(9),
2016 N->getOperand(10),
2017 N->getOperand(11),
2018 N->getOperand(12),
2019 N->getOperand(13),
2020 N->getOperand(14),
2021 N->getOperand(15),
2022 N->getOperand(16),
2023 N->getOperand(17),
2024 N->getOperand(18),
2025 };
2026 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2027 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002028 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002029 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002030 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002031
2032 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002033}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002034
2035static bool
2036FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002037 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002038 const R600InstrInfo *TII =
2039 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2040 if (!Src.isMachineOpcode())
2041 return false;
2042 switch (Src.getMachineOpcode()) {
2043 case AMDGPU::FNEG_R600:
2044 if (!Neg.getNode())
2045 return false;
2046 Src = Src.getOperand(0);
2047 Neg = DAG.getTargetConstant(1, MVT::i32);
2048 return true;
2049 case AMDGPU::FABS_R600:
2050 if (!Abs.getNode())
2051 return false;
2052 Src = Src.getOperand(0);
2053 Abs = DAG.getTargetConstant(1, MVT::i32);
2054 return true;
2055 case AMDGPU::CONST_COPY: {
2056 unsigned Opcode = ParentNode->getMachineOpcode();
2057 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2058
2059 if (!Sel.getNode())
2060 return false;
2061
2062 SDValue CstOffset = Src.getOperand(0);
2063 if (ParentNode->getValueType(0).isVector())
2064 return false;
2065
2066 // Gather constants values
2067 int SrcIndices[] = {
2068 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2069 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2070 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2071 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2072 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2073 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2074 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2075 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2076 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2077 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2078 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2079 };
2080 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002081 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002082 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2083 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2084 continue;
2085 if (HasDst) {
2086 OtherSrcIdx--;
2087 OtherSelIdx--;
2088 }
2089 if (RegisterSDNode *Reg =
2090 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2091 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002092 ConstantSDNode *Cst
2093 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002094 Consts.push_back(Cst->getZExtValue());
2095 }
2096 }
2097 }
2098
Matt Arsenault37c12d72014-05-12 20:42:57 +00002099 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002100 Consts.push_back(Cst->getZExtValue());
2101 if (!TII->fitsConstReadLimitations(Consts)) {
2102 return false;
2103 }
2104
2105 Sel = CstOffset;
2106 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2107 return true;
2108 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002109 case AMDGPU::MOV_IMM_I32:
2110 case AMDGPU::MOV_IMM_F32: {
2111 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2112 uint64_t ImmValue = 0;
2113
2114
2115 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2116 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2117 float FloatValue = FPC->getValueAPF().convertToFloat();
2118 if (FloatValue == 0.0) {
2119 ImmReg = AMDGPU::ZERO;
2120 } else if (FloatValue == 0.5) {
2121 ImmReg = AMDGPU::HALF;
2122 } else if (FloatValue == 1.0) {
2123 ImmReg = AMDGPU::ONE;
2124 } else {
2125 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2126 }
2127 } else {
2128 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2129 uint64_t Value = C->getZExtValue();
2130 if (Value == 0) {
2131 ImmReg = AMDGPU::ZERO;
2132 } else if (Value == 1) {
2133 ImmReg = AMDGPU::ONE_INT;
2134 } else {
2135 ImmValue = Value;
2136 }
2137 }
2138
2139 // Check that we aren't already using an immediate.
2140 // XXX: It's possible for an instruction to have more than one
2141 // immediate operand, but this is not supported yet.
2142 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2143 if (!Imm.getNode())
2144 return false;
2145 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2146 assert(C);
2147 if (C->getZExtValue())
2148 return false;
2149 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2150 }
2151 Src = DAG.getRegister(ImmReg, MVT::i32);
2152 return true;
2153 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002154 default:
2155 return false;
2156 }
2157}
2158
2159
2160/// \brief Fold the instructions after selecting them
2161SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2162 SelectionDAG &DAG) const {
2163 const R600InstrInfo *TII =
2164 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2165 if (!Node->isMachineOpcode())
2166 return Node;
2167 unsigned Opcode = Node->getMachineOpcode();
2168 SDValue FakeOp;
2169
2170 std::vector<SDValue> Ops;
2171 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
2172 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002173 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002174
2175 if (Opcode == AMDGPU::DOT_4) {
2176 int OperandIdx[] = {
2177 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2178 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2179 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2180 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2181 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2182 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2183 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2184 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002185 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002186 int NegIdx[] = {
2187 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2188 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2189 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2195 };
2196 int AbsIdx[] = {
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2205 };
2206 for (unsigned i = 0; i < 8; i++) {
2207 if (OperandIdx[i] < 0)
2208 return Node;
2209 SDValue &Src = Ops[OperandIdx[i] - 1];
2210 SDValue &Neg = Ops[NegIdx[i] - 1];
2211 SDValue &Abs = Ops[AbsIdx[i] - 1];
2212 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2213 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2214 if (HasDst)
2215 SelIdx--;
2216 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002217 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2218 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2219 }
2220 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2221 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2222 SDValue &Src = Ops[i];
2223 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002224 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2225 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002226 } else if (Opcode == AMDGPU::CLAMP_R600) {
2227 SDValue Src = Node->getOperand(0);
2228 if (!Src.isMachineOpcode() ||
2229 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2230 return Node;
2231 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2232 AMDGPU::OpName::clamp);
2233 if (ClampIdx < 0)
2234 return Node;
2235 std::vector<SDValue> Ops;
2236 unsigned NumOp = Src.getNumOperands();
2237 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002238 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002239 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2240 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2241 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002242 } else {
2243 if (!TII->hasInstrModifiers(Opcode))
2244 return Node;
2245 int OperandIdx[] = {
2246 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2247 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2248 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2249 };
2250 int NegIdx[] = {
2251 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2252 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2253 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2254 };
2255 int AbsIdx[] = {
2256 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2257 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2258 -1
2259 };
2260 for (unsigned i = 0; i < 3; i++) {
2261 if (OperandIdx[i] < 0)
2262 return Node;
2263 SDValue &Src = Ops[OperandIdx[i] - 1];
2264 SDValue &Neg = Ops[NegIdx[i] - 1];
2265 SDValue FakeAbs;
2266 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2267 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2268 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002269 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2270 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002271 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002272 ImmIdx--;
2273 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002274 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002275 SDValue &Imm = Ops[ImmIdx];
2276 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002277 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2278 }
2279 }
2280
2281 return Node;
2282}