blob: 87238d6156b9e096d4c20f7317c2a8e64901d2cf [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDILIntrinsicInfo.h"
17#include "AMDGPUFrameLowering.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000071
72 setOperationAction(ISD::FSUB, MVT::f32, Expand);
73
74 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
75 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000077
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
79 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
80
Tom Stellarde8f9f282013-03-08 15:37:05 +000081 setOperationAction(ISD::SETCC, MVT::i32, Expand);
82 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000083 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
84
Tom Stellard53f2f902013-09-05 18:38:03 +000085 setOperationAction(ISD::SELECT, MVT::i32, Expand);
86 setOperationAction(ISD::SELECT, MVT::f32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000088 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Matt Arsenault4e466652014-04-16 01:41:30 +000090 // Expand sign extension of vectors
91 if (!Subtarget->hasBFE())
92 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
93
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
96
97 if (!Subtarget->hasBFE())
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
101
102 if (!Subtarget->hasBFE())
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
106
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
112
113
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000114 // Legalize loads and stores to the private address space.
115 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000116 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000117 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000118
119 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
120 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000121 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
122 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000125 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
127
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000128 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000129 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000130 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000131 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000132 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
133 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
Tom Stellard365366f2013-01-23 02:09:06 +0000135 setOperationAction(ISD::LOAD, MVT::i32, Custom);
136 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000137 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
138
Tom Stellard75aadc22012-12-11 21:25:42 +0000139 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000140 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000141 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000142 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000143 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000144
Tom Stellard5f337882014-04-29 23:12:43 +0000145 // These should be replaced by UDVIREM, but it does not happen automatically
146 // during Type Legalization
147 setOperationAction(ISD::UDIV, MVT::i64, Custom);
148 setOperationAction(ISD::UREM, MVT::i64, Custom);
149
Michel Danzer49812b52013-07-10 16:37:07 +0000150 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
151
Tom Stellardb852af52013-03-08 15:37:03 +0000152 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000153 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000154 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000155}
156
157MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
158 MachineInstr * MI, MachineBasicBlock * BB) const {
159 MachineFunction * MF = BB->getParent();
160 MachineRegisterInfo &MRI = MF->getRegInfo();
161 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000162 const R600InstrInfo *TII =
163 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000164
165 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000166 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000167 // Replace LDS_*_RET instruction that don't have any uses with the
168 // equivalent LDS_*_NORET instruction.
169 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000170 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
171 assert(DstIdx != -1);
172 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000173 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
174 return BB;
175
176 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
177 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000178 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
179 NewMI.addOperand(MI->getOperand(i));
180 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000181 } else {
182 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
183 }
184 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000185 case AMDGPU::CLAMP_R600: {
186 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
187 AMDGPU::MOV,
188 MI->getOperand(0).getReg(),
189 MI->getOperand(1).getReg());
190 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
191 break;
192 }
193
194 case AMDGPU::FABS_R600: {
195 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
196 AMDGPU::MOV,
197 MI->getOperand(0).getReg(),
198 MI->getOperand(1).getReg());
199 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
200 break;
201 }
202
203 case AMDGPU::FNEG_R600: {
204 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
205 AMDGPU::MOV,
206 MI->getOperand(0).getReg(),
207 MI->getOperand(1).getReg());
208 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
209 break;
210 }
211
Tom Stellard75aadc22012-12-11 21:25:42 +0000212 case AMDGPU::MASK_WRITE: {
213 unsigned maskedRegister = MI->getOperand(0).getReg();
214 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
215 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
216 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
217 break;
218 }
219
220 case AMDGPU::MOV_IMM_F32:
221 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
222 MI->getOperand(1).getFPImm()->getValueAPF()
223 .bitcastToAPInt().getZExtValue());
224 break;
225 case AMDGPU::MOV_IMM_I32:
226 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
227 MI->getOperand(1).getImm());
228 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000229 case AMDGPU::CONST_COPY: {
230 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
231 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000232 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000233 MI->getOperand(1).getImm());
234 break;
235 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000236
237 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000238 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000239 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000240 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000241
242 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
243 .addOperand(MI->getOperand(0))
244 .addOperand(MI->getOperand(1))
245 .addImm(EOP); // Set End of program bit
246 break;
247 }
248
Tom Stellard75aadc22012-12-11 21:25:42 +0000249 case AMDGPU::TXD: {
250 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
251 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000252 MachineOperand &RID = MI->getOperand(4);
253 MachineOperand &SID = MI->getOperand(5);
254 unsigned TextureId = MI->getOperand(6).getImm();
255 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
256 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000257
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000258 switch (TextureId) {
259 case 5: // Rect
260 CTX = CTY = 0;
261 break;
262 case 6: // Shadow1D
263 SrcW = SrcZ;
264 break;
265 case 7: // Shadow2D
266 SrcW = SrcZ;
267 break;
268 case 8: // ShadowRect
269 CTX = CTY = 0;
270 SrcW = SrcZ;
271 break;
272 case 9: // 1DArray
273 SrcZ = SrcY;
274 CTZ = 0;
275 break;
276 case 10: // 2DArray
277 CTZ = 0;
278 break;
279 case 11: // Shadow1DArray
280 SrcZ = SrcY;
281 CTZ = 0;
282 break;
283 case 12: // Shadow2DArray
284 CTZ = 0;
285 break;
286 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
288 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000289 .addImm(SrcX)
290 .addImm(SrcY)
291 .addImm(SrcZ)
292 .addImm(SrcW)
293 .addImm(0)
294 .addImm(0)
295 .addImm(0)
296 .addImm(0)
297 .addImm(1)
298 .addImm(2)
299 .addImm(3)
300 .addOperand(RID)
301 .addOperand(SID)
302 .addImm(CTX)
303 .addImm(CTY)
304 .addImm(CTZ)
305 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000306 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
307 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000308 .addImm(SrcX)
309 .addImm(SrcY)
310 .addImm(SrcZ)
311 .addImm(SrcW)
312 .addImm(0)
313 .addImm(0)
314 .addImm(0)
315 .addImm(0)
316 .addImm(1)
317 .addImm(2)
318 .addImm(3)
319 .addOperand(RID)
320 .addOperand(SID)
321 .addImm(CTX)
322 .addImm(CTY)
323 .addImm(CTZ)
324 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000325 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
326 .addOperand(MI->getOperand(0))
327 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 .addReg(T0, RegState::Implicit)
346 .addReg(T1, RegState::Implicit);
347 break;
348 }
349
350 case AMDGPU::TXD_SHADOW: {
351 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
352 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000353 MachineOperand &RID = MI->getOperand(4);
354 MachineOperand &SID = MI->getOperand(5);
355 unsigned TextureId = MI->getOperand(6).getImm();
356 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
357 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
358
359 switch (TextureId) {
360 case 5: // Rect
361 CTX = CTY = 0;
362 break;
363 case 6: // Shadow1D
364 SrcW = SrcZ;
365 break;
366 case 7: // Shadow2D
367 SrcW = SrcZ;
368 break;
369 case 8: // ShadowRect
370 CTX = CTY = 0;
371 SrcW = SrcZ;
372 break;
373 case 9: // 1DArray
374 SrcZ = SrcY;
375 CTZ = 0;
376 break;
377 case 10: // 2DArray
378 CTZ = 0;
379 break;
380 case 11: // Shadow1DArray
381 SrcZ = SrcY;
382 CTZ = 0;
383 break;
384 case 12: // Shadow2DArray
385 CTZ = 0;
386 break;
387 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000388
389 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
390 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000391 .addImm(SrcX)
392 .addImm(SrcY)
393 .addImm(SrcZ)
394 .addImm(SrcW)
395 .addImm(0)
396 .addImm(0)
397 .addImm(0)
398 .addImm(0)
399 .addImm(1)
400 .addImm(2)
401 .addImm(3)
402 .addOperand(RID)
403 .addOperand(SID)
404 .addImm(CTX)
405 .addImm(CTY)
406 .addImm(CTZ)
407 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000408 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
409 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000410 .addImm(SrcX)
411 .addImm(SrcY)
412 .addImm(SrcZ)
413 .addImm(SrcW)
414 .addImm(0)
415 .addImm(0)
416 .addImm(0)
417 .addImm(0)
418 .addImm(1)
419 .addImm(2)
420 .addImm(3)
421 .addOperand(RID)
422 .addOperand(SID)
423 .addImm(CTX)
424 .addImm(CTY)
425 .addImm(CTZ)
426 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
428 .addOperand(MI->getOperand(0))
429 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000430 .addImm(SrcX)
431 .addImm(SrcY)
432 .addImm(SrcZ)
433 .addImm(SrcW)
434 .addImm(0)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(1)
439 .addImm(2)
440 .addImm(3)
441 .addOperand(RID)
442 .addOperand(SID)
443 .addImm(CTX)
444 .addImm(CTY)
445 .addImm(CTZ)
446 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000447 .addReg(T0, RegState::Implicit)
448 .addReg(T1, RegState::Implicit);
449 break;
450 }
451
452 case AMDGPU::BRANCH:
453 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000454 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 break;
456
457 case AMDGPU::BRANCH_COND_f32: {
458 MachineInstr *NewMI =
459 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
460 AMDGPU::PREDICATE_BIT)
461 .addOperand(MI->getOperand(1))
462 .addImm(OPCODE_IS_NOT_ZERO)
463 .addImm(0); // Flags
464 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000465 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 .addOperand(MI->getOperand(0))
467 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
468 break;
469 }
470
471 case AMDGPU::BRANCH_COND_i32: {
472 MachineInstr *NewMI =
473 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
474 AMDGPU::PREDICATE_BIT)
475 .addOperand(MI->getOperand(1))
476 .addImm(OPCODE_IS_NOT_ZERO_INT)
477 .addImm(0); // Flags
478 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000479 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000480 .addOperand(MI->getOperand(0))
481 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
482 break;
483 }
484
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 case AMDGPU::EG_ExportSwz:
486 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000487 // Instruction is left unmodified if its not the last one of its type
488 bool isLastInstructionOfItsType = true;
489 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000490 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000491 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000492 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000493 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
494 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
495 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
496 .getImm();
497 if (CurrentInstExportType == InstExportType) {
498 isLastInstructionOfItsType = false;
499 break;
500 }
501 }
502 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000503 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000504 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000505 return BB;
506 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
507 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
508 .addOperand(MI->getOperand(0))
509 .addOperand(MI->getOperand(1))
510 .addOperand(MI->getOperand(2))
511 .addOperand(MI->getOperand(3))
512 .addOperand(MI->getOperand(4))
513 .addOperand(MI->getOperand(5))
514 .addOperand(MI->getOperand(6))
515 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000516 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000517 break;
518 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000519 case AMDGPU::RETURN: {
520 // RETURN instructions must have the live-out registers as implicit uses,
521 // otherwise they appear dead.
522 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
523 MachineInstrBuilder MIB(*MF, MI);
524 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
525 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
526 return BB;
527 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 }
529
530 MI->eraseFromParent();
531 return BB;
532}
533
534//===----------------------------------------------------------------------===//
535// Custom DAG Lowering Operations
536//===----------------------------------------------------------------------===//
537
Tom Stellard75aadc22012-12-11 21:25:42 +0000538SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000539 MachineFunction &MF = DAG.getMachineFunction();
540 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000541 switch (Op.getOpcode()) {
542 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000543 case ISD::FCOS:
544 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000547 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000548 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000549 case ISD::INTRINSIC_VOID: {
550 SDValue Chain = Op.getOperand(0);
551 unsigned IntrinsicID =
552 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
553 switch (IntrinsicID) {
554 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
556 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000557 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000558 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000559 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000560 case AMDGPUIntrinsic::R600_store_swizzle: {
561 const SDValue Args[8] = {
562 Chain,
563 Op.getOperand(2), // Export Value
564 Op.getOperand(3), // ArrayBase
565 Op.getOperand(4), // Type
566 DAG.getConstant(0, MVT::i32), // SWZ_X
567 DAG.getConstant(1, MVT::i32), // SWZ_Y
568 DAG.getConstant(2, MVT::i32), // SWZ_Z
569 DAG.getConstant(3, MVT::i32) // SWZ_W
570 };
Craig Topper48d114b2014-04-26 18:35:24 +0000571 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000572 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000573
Tom Stellard75aadc22012-12-11 21:25:42 +0000574 // default for switch(IntrinsicID)
575 default: break;
576 }
577 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
578 break;
579 }
580 case ISD::INTRINSIC_WO_CHAIN: {
581 unsigned IntrinsicID =
582 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
583 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000584 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000585 switch(IntrinsicID) {
586 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000587 case AMDGPUIntrinsic::R600_load_input: {
588 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
589 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
590 MachineFunction &MF = DAG.getMachineFunction();
591 MachineRegisterInfo &MRI = MF.getRegInfo();
592 MRI.addLiveIn(Reg);
593 return DAG.getCopyFromReg(DAG.getEntryNode(),
594 SDLoc(DAG.getEntryNode()), Reg, VT);
595 }
596
597 case AMDGPUIntrinsic::R600_interp_input: {
598 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
599 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
600 MachineSDNode *interp;
601 if (ijb < 0) {
602 const MachineFunction &MF = DAG.getMachineFunction();
603 const R600InstrInfo *TII =
604 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
605 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
606 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
607 return DAG.getTargetExtractSubreg(
608 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
609 DL, MVT::f32, SDValue(interp, 0));
610 }
611 MachineFunction &MF = DAG.getMachineFunction();
612 MachineRegisterInfo &MRI = MF.getRegInfo();
613 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
614 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
615 MRI.addLiveIn(RegisterI);
616 MRI.addLiveIn(RegisterJ);
617 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
618 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
619 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
620 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
621
622 if (slot % 4 < 2)
623 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
624 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
625 RegisterJNode, RegisterINode);
626 else
627 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
628 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
629 RegisterJNode, RegisterINode);
630 return SDValue(interp, slot % 2);
631 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000632 case AMDGPUIntrinsic::R600_interp_xy:
633 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000634 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000635 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000636 SDValue RegisterINode = Op.getOperand(2);
637 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000638
Vincent Lejeunef143af32013-11-11 22:10:24 +0000639 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000640 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000641 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000642 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000643 else
644 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000645 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000646 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000647 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
648 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000649 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000650 case AMDGPUIntrinsic::R600_tex:
651 case AMDGPUIntrinsic::R600_texc:
652 case AMDGPUIntrinsic::R600_txl:
653 case AMDGPUIntrinsic::R600_txlc:
654 case AMDGPUIntrinsic::R600_txb:
655 case AMDGPUIntrinsic::R600_txbc:
656 case AMDGPUIntrinsic::R600_txf:
657 case AMDGPUIntrinsic::R600_txq:
658 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000659 case AMDGPUIntrinsic::R600_ddy:
660 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000661 unsigned TextureOp;
662 switch (IntrinsicID) {
663 case AMDGPUIntrinsic::R600_tex:
664 TextureOp = 0;
665 break;
666 case AMDGPUIntrinsic::R600_texc:
667 TextureOp = 1;
668 break;
669 case AMDGPUIntrinsic::R600_txl:
670 TextureOp = 2;
671 break;
672 case AMDGPUIntrinsic::R600_txlc:
673 TextureOp = 3;
674 break;
675 case AMDGPUIntrinsic::R600_txb:
676 TextureOp = 4;
677 break;
678 case AMDGPUIntrinsic::R600_txbc:
679 TextureOp = 5;
680 break;
681 case AMDGPUIntrinsic::R600_txf:
682 TextureOp = 6;
683 break;
684 case AMDGPUIntrinsic::R600_txq:
685 TextureOp = 7;
686 break;
687 case AMDGPUIntrinsic::R600_ddx:
688 TextureOp = 8;
689 break;
690 case AMDGPUIntrinsic::R600_ddy:
691 TextureOp = 9;
692 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000693 case AMDGPUIntrinsic::R600_ldptr:
694 TextureOp = 10;
695 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 default:
697 llvm_unreachable("Unknow Texture Operation");
698 }
699
700 SDValue TexArgs[19] = {
701 DAG.getConstant(TextureOp, MVT::i32),
702 Op.getOperand(1),
703 DAG.getConstant(0, MVT::i32),
704 DAG.getConstant(1, MVT::i32),
705 DAG.getConstant(2, MVT::i32),
706 DAG.getConstant(3, MVT::i32),
707 Op.getOperand(2),
708 Op.getOperand(3),
709 Op.getOperand(4),
710 DAG.getConstant(0, MVT::i32),
711 DAG.getConstant(1, MVT::i32),
712 DAG.getConstant(2, MVT::i32),
713 DAG.getConstant(3, MVT::i32),
714 Op.getOperand(5),
715 Op.getOperand(6),
716 Op.getOperand(7),
717 Op.getOperand(8),
718 Op.getOperand(9),
719 Op.getOperand(10)
720 };
Craig Topper48d114b2014-04-26 18:35:24 +0000721 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000722 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000723 case AMDGPUIntrinsic::AMDGPU_dp4: {
724 SDValue Args[8] = {
725 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
726 DAG.getConstant(0, MVT::i32)),
727 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
728 DAG.getConstant(0, MVT::i32)),
729 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
730 DAG.getConstant(1, MVT::i32)),
731 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
732 DAG.getConstant(1, MVT::i32)),
733 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
734 DAG.getConstant(2, MVT::i32)),
735 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
736 DAG.getConstant(2, MVT::i32)),
737 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
738 DAG.getConstant(3, MVT::i32)),
739 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
740 DAG.getConstant(3, MVT::i32))
741 };
Craig Topper48d114b2014-04-26 18:35:24 +0000742 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000744
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000745 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000746 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000749 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000750 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000751 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000752 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000753 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000754 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000755 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000756 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000757 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000758 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000759 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000760 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return LowerImplicitParameter(DAG, VT, DL, 8);
763
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000764 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000765 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
766 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
769 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
772 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
775 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
778 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000779 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
781 AMDGPU::T0_Z, VT);
782 }
783 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
784 break;
785 }
786 } // end switch(Op.getOpcode())
787 return SDValue();
788}
789
790void R600TargetLowering::ReplaceNodeResults(SDNode *N,
791 SmallVectorImpl<SDValue> &Results,
792 SelectionDAG &DAG) const {
793 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000794 default:
795 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
796 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000798 return;
799 case ISD::LOAD: {
800 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
801 Results.push_back(SDValue(Node, 0));
802 Results.push_back(SDValue(Node, 1));
803 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
804 // function
805 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
806 return;
807 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000808 case ISD::STORE:
809 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
810 Results.push_back(SDValue(Node, 0));
811 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 }
813}
814
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000815SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
816 // On hw >= R700, COS/SIN input must be between -1. and 1.
817 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
818 EVT VT = Op.getValueType();
819 SDValue Arg = Op.getOperand(0);
820 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
821 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
822 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
823 DAG.getConstantFP(0.15915494309, MVT::f32)),
824 DAG.getConstantFP(0.5, MVT::f32)));
825 unsigned TrigNode;
826 switch (Op.getOpcode()) {
827 case ISD::FCOS:
828 TrigNode = AMDGPUISD::COS_HW;
829 break;
830 case ISD::FSIN:
831 TrigNode = AMDGPUISD::SIN_HW;
832 break;
833 default:
834 llvm_unreachable("Wrong trig opcode");
835 }
836 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
837 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
838 DAG.getConstantFP(-0.5, MVT::f32)));
839 if (Gen >= AMDGPUSubtarget::R700)
840 return TrigVal;
841 // On R600 hw, COS/SIN input must be between -Pi and Pi.
842 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
843 DAG.getConstantFP(3.14159265359, MVT::f32));
844}
845
Tom Stellard75aadc22012-12-11 21:25:42 +0000846SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
847 return DAG.getNode(
848 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000849 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000850 MVT::i1,
851 Op, DAG.getConstantFP(0.0f, MVT::f32),
852 DAG.getCondCode(ISD::SETNE)
853 );
854}
855
Tom Stellard75aadc22012-12-11 21:25:42 +0000856SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000857 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000858 unsigned DwordOffset) const {
859 unsigned ByteOffset = DwordOffset * 4;
860 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000861 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000862
863 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
864 assert(isInt<16>(ByteOffset));
865
866 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
867 DAG.getConstant(ByteOffset, MVT::i32), // PTR
868 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
869 false, false, false, 0);
870}
871
Tom Stellard75aadc22012-12-11 21:25:42 +0000872bool R600TargetLowering::isZero(SDValue Op) const {
873 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
874 return Cst->isNullValue();
875 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
876 return CstFP->isZero();
877 } else {
878 return false;
879 }
880}
881
882SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000883 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000884 EVT VT = Op.getValueType();
885
886 SDValue LHS = Op.getOperand(0);
887 SDValue RHS = Op.getOperand(1);
888 SDValue True = Op.getOperand(2);
889 SDValue False = Op.getOperand(3);
890 SDValue CC = Op.getOperand(4);
891 SDValue Temp;
892
893 // LHS and RHS are guaranteed to be the same value type
894 EVT CompareVT = LHS.getValueType();
895
896 // Check if we can lower this to a native operation.
897
Tom Stellard2add82d2013-03-08 15:37:09 +0000898 // Try to lower to a SET* instruction:
899 //
900 // SET* can match the following patterns:
901 //
Tom Stellardcd428182013-09-28 02:50:38 +0000902 // select_cc f32, f32, -1, 0, cc_supported
903 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
904 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000905 //
906
907 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000908 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
909 ISD::CondCode InverseCC =
910 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000911 if (isHWTrueValue(False) && isHWFalseValue(True)) {
912 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
913 std::swap(False, True);
914 CC = DAG.getCondCode(InverseCC);
915 } else {
916 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
917 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
918 std::swap(False, True);
919 std::swap(LHS, RHS);
920 CC = DAG.getCondCode(SwapInvCC);
921 }
922 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000923 }
924
925 if (isHWTrueValue(True) && isHWFalseValue(False) &&
926 (CompareVT == VT || VT == MVT::i32)) {
927 // This can be matched by a SET* instruction.
928 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
929 }
930
Tom Stellard75aadc22012-12-11 21:25:42 +0000931 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000932 //
933 // CND* can match the following patterns:
934 //
Tom Stellardcd428182013-09-28 02:50:38 +0000935 // select_cc f32, 0.0, f32, f32, cc_supported
936 // select_cc f32, 0.0, i32, i32, cc_supported
937 // select_cc i32, 0, f32, f32, cc_supported
938 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000939 //
Tom Stellardcd428182013-09-28 02:50:38 +0000940
941 // Try to move the zero value to the RHS
942 if (isZero(LHS)) {
943 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
944 // Try swapping the operands
945 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
946 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
947 std::swap(LHS, RHS);
948 CC = DAG.getCondCode(CCSwapped);
949 } else {
950 // Try inverting the conditon and then swapping the operands
951 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
952 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
953 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
954 std::swap(True, False);
955 std::swap(LHS, RHS);
956 CC = DAG.getCondCode(CCSwapped);
957 }
958 }
959 }
960 if (isZero(RHS)) {
961 SDValue Cond = LHS;
962 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000963 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
964 if (CompareVT != VT) {
965 // Bitcast True / False to the correct types. This will end up being
966 // a nop, but it allows us to define only a single pattern in the
967 // .TD files for each CND* instruction rather than having to have
968 // one pattern for integer True/False and one for fp True/False
969 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
970 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
971 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000972
973 switch (CCOpcode) {
974 case ISD::SETONE:
975 case ISD::SETUNE:
976 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000977 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
978 Temp = True;
979 True = False;
980 False = Temp;
981 break;
982 default:
983 break;
984 }
985 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
986 Cond, Zero,
987 True, False,
988 DAG.getCondCode(CCOpcode));
989 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
990 }
991
Tom Stellard75aadc22012-12-11 21:25:42 +0000992 // If we make it this for it means we have no native instructions to handle
993 // this SELECT_CC, so we must lower it.
994 SDValue HWTrue, HWFalse;
995
996 if (CompareVT == MVT::f32) {
997 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
998 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
999 } else if (CompareVT == MVT::i32) {
1000 HWTrue = DAG.getConstant(-1, CompareVT);
1001 HWFalse = DAG.getConstant(0, CompareVT);
1002 }
1003 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001004 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001005 }
1006
1007 // Lower this unsupported SELECT_CC into a combination of two supported
1008 // SELECT_CC operations.
1009 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1010
1011 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1012 Cond, HWFalse,
1013 True, False,
1014 DAG.getCondCode(ISD::SETNE));
1015}
1016
Alp Tokercb402912014-01-24 17:20:08 +00001017/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001018/// convert these pointers to a register index. Each register holds
1019/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1020/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1021/// for indirect addressing.
1022SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1023 unsigned StackWidth,
1024 SelectionDAG &DAG) const {
1025 unsigned SRLPad;
1026 switch(StackWidth) {
1027 case 1:
1028 SRLPad = 2;
1029 break;
1030 case 2:
1031 SRLPad = 3;
1032 break;
1033 case 4:
1034 SRLPad = 4;
1035 break;
1036 default: llvm_unreachable("Invalid stack width");
1037 }
1038
Andrew Trickef9de2a2013-05-25 02:42:55 +00001039 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001040 DAG.getConstant(SRLPad, MVT::i32));
1041}
1042
1043void R600TargetLowering::getStackAddress(unsigned StackWidth,
1044 unsigned ElemIdx,
1045 unsigned &Channel,
1046 unsigned &PtrIncr) const {
1047 switch (StackWidth) {
1048 default:
1049 case 1:
1050 Channel = 0;
1051 if (ElemIdx > 0) {
1052 PtrIncr = 1;
1053 } else {
1054 PtrIncr = 0;
1055 }
1056 break;
1057 case 2:
1058 Channel = ElemIdx % 2;
1059 if (ElemIdx == 2) {
1060 PtrIncr = 1;
1061 } else {
1062 PtrIncr = 0;
1063 }
1064 break;
1065 case 4:
1066 Channel = ElemIdx;
1067 PtrIncr = 0;
1068 break;
1069 }
1070}
1071
Tom Stellard75aadc22012-12-11 21:25:42 +00001072SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001073 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001074 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1075 SDValue Chain = Op.getOperand(0);
1076 SDValue Value = Op.getOperand(1);
1077 SDValue Ptr = Op.getOperand(2);
1078
Tom Stellard2ffc3302013-08-26 15:05:44 +00001079 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001080 if (Result.getNode()) {
1081 return Result;
1082 }
1083
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001084 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1085 if (StoreNode->isTruncatingStore()) {
1086 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001087 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001088 EVT MemVT = StoreNode->getMemoryVT();
1089 SDValue MaskConstant;
1090 if (MemVT == MVT::i8) {
1091 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1092 } else {
1093 assert(MemVT == MVT::i16);
1094 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1095 }
1096 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1097 DAG.getConstant(2, MVT::i32));
1098 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1099 DAG.getConstant(0x00000003, VT));
1100 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1101 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1102 DAG.getConstant(3, VT));
1103 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1104 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1105 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1106 // vector instead.
1107 SDValue Src[4] = {
1108 ShiftedValue,
1109 DAG.getConstant(0, MVT::i32),
1110 DAG.getConstant(0, MVT::i32),
1111 Mask
1112 };
Craig Topper48d114b2014-04-26 18:35:24 +00001113 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001114 SDValue Args[3] = { Chain, Input, DWordAddr };
1115 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001116 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001117 StoreNode->getMemOperand());
1118 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1119 Value.getValueType().bitsGE(MVT::i32)) {
1120 // Convert pointer from byte address to dword address.
1121 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1122 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1123 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001124
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001125 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001126 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001127 } else {
1128 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1129 }
1130 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001131 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001132 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001133
1134 EVT ValueVT = Value.getValueType();
1135
1136 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1137 return SDValue();
1138 }
1139
Tom Stellarde9373602014-01-22 19:24:14 +00001140 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1141 if (Ret.getNode()) {
1142 return Ret;
1143 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001144 // Lowering for indirect addressing
1145
1146 const MachineFunction &MF = DAG.getMachineFunction();
1147 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1148 getTargetMachine().getFrameLowering());
1149 unsigned StackWidth = TFL->getStackWidth(MF);
1150
1151 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1152
1153 if (ValueVT.isVector()) {
1154 unsigned NumElemVT = ValueVT.getVectorNumElements();
1155 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001156 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001157
1158 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1159 "vector width in load");
1160
1161 for (unsigned i = 0; i < NumElemVT; ++i) {
1162 unsigned Channel, PtrIncr;
1163 getStackAddress(StackWidth, i, Channel, PtrIncr);
1164 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1165 DAG.getConstant(PtrIncr, MVT::i32));
1166 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1167 Value, DAG.getConstant(i, MVT::i32));
1168
1169 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1170 Chain, Elem, Ptr,
1171 DAG.getTargetConstant(Channel, MVT::i32));
1172 }
Craig Topper48d114b2014-04-26 18:35:24 +00001173 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001174 } else {
1175 if (ValueVT == MVT::i8) {
1176 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1177 }
1178 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001179 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001180 }
1181
1182 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001183}
1184
Tom Stellard365366f2013-01-23 02:09:06 +00001185// return (512 + (kc_bank << 12)
1186static int
1187ConstantAddressBlock(unsigned AddressSpace) {
1188 switch (AddressSpace) {
1189 case AMDGPUAS::CONSTANT_BUFFER_0:
1190 return 512;
1191 case AMDGPUAS::CONSTANT_BUFFER_1:
1192 return 512 + 4096;
1193 case AMDGPUAS::CONSTANT_BUFFER_2:
1194 return 512 + 4096 * 2;
1195 case AMDGPUAS::CONSTANT_BUFFER_3:
1196 return 512 + 4096 * 3;
1197 case AMDGPUAS::CONSTANT_BUFFER_4:
1198 return 512 + 4096 * 4;
1199 case AMDGPUAS::CONSTANT_BUFFER_5:
1200 return 512 + 4096 * 5;
1201 case AMDGPUAS::CONSTANT_BUFFER_6:
1202 return 512 + 4096 * 6;
1203 case AMDGPUAS::CONSTANT_BUFFER_7:
1204 return 512 + 4096 * 7;
1205 case AMDGPUAS::CONSTANT_BUFFER_8:
1206 return 512 + 4096 * 8;
1207 case AMDGPUAS::CONSTANT_BUFFER_9:
1208 return 512 + 4096 * 9;
1209 case AMDGPUAS::CONSTANT_BUFFER_10:
1210 return 512 + 4096 * 10;
1211 case AMDGPUAS::CONSTANT_BUFFER_11:
1212 return 512 + 4096 * 11;
1213 case AMDGPUAS::CONSTANT_BUFFER_12:
1214 return 512 + 4096 * 12;
1215 case AMDGPUAS::CONSTANT_BUFFER_13:
1216 return 512 + 4096 * 13;
1217 case AMDGPUAS::CONSTANT_BUFFER_14:
1218 return 512 + 4096 * 14;
1219 case AMDGPUAS::CONSTANT_BUFFER_15:
1220 return 512 + 4096 * 15;
1221 default:
1222 return -1;
1223 }
1224}
1225
1226SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1227{
1228 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001229 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001230 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1231 SDValue Chain = Op.getOperand(0);
1232 SDValue Ptr = Op.getOperand(1);
1233 SDValue LoweredLoad;
1234
Tom Stellarde9373602014-01-22 19:24:14 +00001235 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1236 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001237 SDValue Ops[2] = {
1238 Ret,
1239 Chain
1240 };
Craig Topper64941d92014-04-27 19:20:57 +00001241 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001242 }
1243
1244
Tom Stellard35bb18c2013-08-26 15:06:04 +00001245 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1246 SDValue MergedValues[2] = {
1247 SplitVectorLoad(Op, DAG),
1248 Chain
1249 };
Craig Topper64941d92014-04-27 19:20:57 +00001250 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001251 }
1252
Tom Stellard365366f2013-01-23 02:09:06 +00001253 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001254 if (ConstantBlock > -1 &&
1255 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1256 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001257 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001258 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1259 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001260 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001261 SDValue Slots[4];
1262 for (unsigned i = 0; i < 4; i++) {
1263 // We want Const position encoded with the following formula :
1264 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1265 // const_index is Ptr computed by llvm using an alignment of 16.
1266 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1267 // then div by 4 at the ISel step
1268 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1269 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1270 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1271 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001272 EVT NewVT = MVT::v4i32;
1273 unsigned NumElements = 4;
1274 if (VT.isVector()) {
1275 NewVT = VT;
1276 NumElements = VT.getVectorNumElements();
1277 }
Craig Topper48d114b2014-04-26 18:35:24 +00001278 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001279 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001280 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001281 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001282 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001283 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001284 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001285 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001286 );
1287 }
1288
1289 if (!VT.isVector()) {
1290 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1291 DAG.getConstant(0, MVT::i32));
1292 }
1293
1294 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001295 Result,
1296 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001297 };
Craig Topper64941d92014-04-27 19:20:57 +00001298 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001299 }
1300
Matt Arsenault909d0c02013-10-30 23:43:29 +00001301 // For most operations returning SDValue() will result in the node being
1302 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1303 // need to manually expand loads that may be legal in some address spaces and
1304 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1305 // compute shaders, since the data is sign extended when it is uploaded to the
1306 // buffer. However SEXT loads from other address spaces are not supported, so
1307 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001308 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1309 EVT MemVT = LoadNode->getMemoryVT();
1310 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1311 SDValue ShiftAmount =
1312 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1313 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1314 LoadNode->getPointerInfo(), MemVT,
1315 LoadNode->isVolatile(),
1316 LoadNode->isNonTemporal(),
1317 LoadNode->getAlignment());
1318 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1319 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1320
1321 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001322 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001323 }
1324
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001325 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1326 return SDValue();
1327 }
1328
1329 // Lowering for indirect addressing
1330 const MachineFunction &MF = DAG.getMachineFunction();
1331 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1332 getTargetMachine().getFrameLowering());
1333 unsigned StackWidth = TFL->getStackWidth(MF);
1334
1335 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1336
1337 if (VT.isVector()) {
1338 unsigned NumElemVT = VT.getVectorNumElements();
1339 EVT ElemVT = VT.getVectorElementType();
1340 SDValue Loads[4];
1341
1342 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1343 "vector width in load");
1344
1345 for (unsigned i = 0; i < NumElemVT; ++i) {
1346 unsigned Channel, PtrIncr;
1347 getStackAddress(StackWidth, i, Channel, PtrIncr);
1348 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1349 DAG.getConstant(PtrIncr, MVT::i32));
1350 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1351 Chain, Ptr,
1352 DAG.getTargetConstant(Channel, MVT::i32),
1353 Op.getOperand(2));
1354 }
1355 for (unsigned i = NumElemVT; i < 4; ++i) {
1356 Loads[i] = DAG.getUNDEF(ElemVT);
1357 }
1358 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001359 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001360 } else {
1361 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1362 Chain, Ptr,
1363 DAG.getTargetConstant(0, MVT::i32), // Channel
1364 Op.getOperand(2));
1365 }
1366
Matt Arsenault7939acd2014-04-07 16:44:24 +00001367 SDValue Ops[2] = {
1368 LoweredLoad,
1369 Chain
1370 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001371
Craig Topper64941d92014-04-27 19:20:57 +00001372 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001373}
Tom Stellard75aadc22012-12-11 21:25:42 +00001374
Tom Stellard75aadc22012-12-11 21:25:42 +00001375/// XXX Only kernel functions are supported, so we can assume for now that
1376/// every function is a kernel function, but in the future we should use
1377/// separate calling conventions for kernel and non-kernel functions.
1378SDValue R600TargetLowering::LowerFormalArguments(
1379 SDValue Chain,
1380 CallingConv::ID CallConv,
1381 bool isVarArg,
1382 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001383 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001384 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001385 SmallVector<CCValAssign, 16> ArgLocs;
1386 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1387 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001388 MachineFunction &MF = DAG.getMachineFunction();
1389 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001390
Tom Stellardaf775432013-10-23 00:44:32 +00001391 SmallVector<ISD::InputArg, 8> LocalIns;
1392
Matt Arsenault209a7b92014-04-18 07:40:20 +00001393 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001394
1395 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001396
Tom Stellard1e803092013-07-23 01:48:18 +00001397 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001398 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001399 EVT VT = Ins[i].VT;
1400 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001401
Vincent Lejeunef143af32013-11-11 22:10:24 +00001402 if (ShaderType != ShaderType::COMPUTE) {
1403 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1404 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1405 InVals.push_back(Register);
1406 continue;
1407 }
1408
Tom Stellard75aadc22012-12-11 21:25:42 +00001409 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001410 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001411
Matt Arsenaultfae02982014-03-17 18:58:11 +00001412 // i64 isn't a legal type, so the register type used ends up as i32, which
1413 // isn't expected here. It attempts to create this sextload, but it ends up
1414 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1415 // for <1 x i64>.
1416
Tom Stellardacfeebf2013-07-23 01:48:05 +00001417 // The first 36 bytes of the input buffer contains information about
1418 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001419
1420 // FIXME: This should really check the extload type, but the handling of
1421 // extload vecto parameters seems to be broken.
1422 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1423 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1424 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001425 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1426 MachinePointerInfo(UndefValue::get(PtrTy)),
1427 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001428
1429 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001430 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001431 }
1432 return Chain;
1433}
1434
Matt Arsenault758659232013-05-18 00:21:46 +00001435EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001436 if (!VT.isVector())
1437 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001438 return VT.changeVectorElementTypeToInteger();
1439}
1440
Matt Arsenault209a7b92014-04-18 07:40:20 +00001441static SDValue CompactSwizzlableVector(
1442 SelectionDAG &DAG, SDValue VectorEntry,
1443 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001444 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1445 assert(RemapSwizzle.empty());
1446 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001447 VectorEntry.getOperand(0),
1448 VectorEntry.getOperand(1),
1449 VectorEntry.getOperand(2),
1450 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001451 };
1452
1453 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001454 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1455 // We mask write here to teach later passes that the ith element of this
1456 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1457 // break false dependencies and additionnaly make assembly easier to read.
1458 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001459 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1460 if (C->isZero()) {
1461 RemapSwizzle[i] = 4; // SEL_0
1462 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1463 } else if (C->isExactlyValue(1.0)) {
1464 RemapSwizzle[i] = 5; // SEL_1
1465 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1466 }
1467 }
1468
1469 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1470 continue;
1471 for (unsigned j = 0; j < i; j++) {
1472 if (NewBldVec[i] == NewBldVec[j]) {
1473 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1474 RemapSwizzle[i] = j;
1475 break;
1476 }
1477 }
1478 }
1479
1480 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001481 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001482}
1483
Benjamin Kramer193960c2013-06-11 13:32:25 +00001484static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1485 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001486 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1487 assert(RemapSwizzle.empty());
1488 SDValue NewBldVec[4] = {
1489 VectorEntry.getOperand(0),
1490 VectorEntry.getOperand(1),
1491 VectorEntry.getOperand(2),
1492 VectorEntry.getOperand(3)
1493 };
1494 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001495 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001496 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001497 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1498 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1499 ->getZExtValue();
1500 if (i == Idx)
1501 isUnmovable[Idx] = true;
1502 }
1503 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001504
1505 for (unsigned i = 0; i < 4; i++) {
1506 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1507 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1508 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001509 if (isUnmovable[Idx])
1510 continue;
1511 // Swap i and Idx
1512 std::swap(NewBldVec[Idx], NewBldVec[i]);
1513 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1514 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001515 }
1516 }
1517
1518 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001519 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001520}
1521
1522
1523SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1524SDValue Swz[4], SelectionDAG &DAG) const {
1525 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1526 // Old -> New swizzle values
1527 DenseMap<unsigned, unsigned> SwizzleRemap;
1528
1529 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1530 for (unsigned i = 0; i < 4; i++) {
1531 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1532 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1533 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1534 }
1535
1536 SwizzleRemap.clear();
1537 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1538 for (unsigned i = 0; i < 4; i++) {
1539 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1540 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1541 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1542 }
1543
1544 return BuildVector;
1545}
1546
1547
Tom Stellard75aadc22012-12-11 21:25:42 +00001548//===----------------------------------------------------------------------===//
1549// Custom DAG Optimizations
1550//===----------------------------------------------------------------------===//
1551
1552SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1553 DAGCombinerInfo &DCI) const {
1554 SelectionDAG &DAG = DCI.DAG;
1555
1556 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001557 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001558 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1559 case ISD::FP_ROUND: {
1560 SDValue Arg = N->getOperand(0);
1561 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001562 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001563 Arg.getOperand(0));
1564 }
1565 break;
1566 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001567
1568 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1569 // (i32 select_cc f32, f32, -1, 0 cc)
1570 //
1571 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1572 // this to one of the SET*_DX10 instructions.
1573 case ISD::FP_TO_SINT: {
1574 SDValue FNeg = N->getOperand(0);
1575 if (FNeg.getOpcode() != ISD::FNEG) {
1576 return SDValue();
1577 }
1578 SDValue SelectCC = FNeg.getOperand(0);
1579 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1580 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1581 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1582 !isHWTrueValue(SelectCC.getOperand(2)) ||
1583 !isHWFalseValue(SelectCC.getOperand(3))) {
1584 return SDValue();
1585 }
1586
Andrew Trickef9de2a2013-05-25 02:42:55 +00001587 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001588 SelectCC.getOperand(0), // LHS
1589 SelectCC.getOperand(1), // RHS
1590 DAG.getConstant(-1, MVT::i32), // True
1591 DAG.getConstant(0, MVT::i32), // Flase
1592 SelectCC.getOperand(4)); // CC
1593
1594 break;
1595 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001596
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001597 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1598 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001599 case ISD::INSERT_VECTOR_ELT: {
1600 SDValue InVec = N->getOperand(0);
1601 SDValue InVal = N->getOperand(1);
1602 SDValue EltNo = N->getOperand(2);
1603 SDLoc dl(N);
1604
1605 // If the inserted element is an UNDEF, just use the input vector.
1606 if (InVal.getOpcode() == ISD::UNDEF)
1607 return InVec;
1608
1609 EVT VT = InVec.getValueType();
1610
1611 // If we can't generate a legal BUILD_VECTOR, exit
1612 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1613 return SDValue();
1614
1615 // Check that we know which element is being inserted
1616 if (!isa<ConstantSDNode>(EltNo))
1617 return SDValue();
1618 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1619
1620 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1621 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1622 // vector elements.
1623 SmallVector<SDValue, 8> Ops;
1624 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1625 Ops.append(InVec.getNode()->op_begin(),
1626 InVec.getNode()->op_end());
1627 } else if (InVec.getOpcode() == ISD::UNDEF) {
1628 unsigned NElts = VT.getVectorNumElements();
1629 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1630 } else {
1631 return SDValue();
1632 }
1633
1634 // Insert the element
1635 if (Elt < Ops.size()) {
1636 // All the operands of BUILD_VECTOR must have the same type;
1637 // we enforce that here.
1638 EVT OpVT = Ops[0].getValueType();
1639 if (InVal.getValueType() != OpVT)
1640 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1641 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1642 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1643 Ops[Elt] = InVal;
1644 }
1645
1646 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001647 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001648 }
1649
Tom Stellard365366f2013-01-23 02:09:06 +00001650 // Extract_vec (Build_vector) generated by custom lowering
1651 // also needs to be customly combined
1652 case ISD::EXTRACT_VECTOR_ELT: {
1653 SDValue Arg = N->getOperand(0);
1654 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1655 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1656 unsigned Element = Const->getZExtValue();
1657 return Arg->getOperand(Element);
1658 }
1659 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001660 if (Arg.getOpcode() == ISD::BITCAST &&
1661 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1662 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1663 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001664 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001665 Arg->getOperand(0).getOperand(Element));
1666 }
1667 }
Tom Stellard365366f2013-01-23 02:09:06 +00001668 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001669
1670 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001671 // Try common optimizations
1672 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1673 if (Ret.getNode())
1674 return Ret;
1675
Tom Stellarde06163a2013-02-07 14:02:35 +00001676 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1677 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001678 //
1679 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1680 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001681 SDValue LHS = N->getOperand(0);
1682 if (LHS.getOpcode() != ISD::SELECT_CC) {
1683 return SDValue();
1684 }
1685
1686 SDValue RHS = N->getOperand(1);
1687 SDValue True = N->getOperand(2);
1688 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001689 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001690
1691 if (LHS.getOperand(2).getNode() != True.getNode() ||
1692 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001693 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001694 return SDValue();
1695 }
1696
Tom Stellard5e524892013-03-08 15:37:11 +00001697 switch (NCC) {
1698 default: return SDValue();
1699 case ISD::SETNE: return LHS;
1700 case ISD::SETEQ: {
1701 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1702 LHSCC = ISD::getSetCCInverse(LHSCC,
1703 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001704 if (DCI.isBeforeLegalizeOps() ||
1705 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1706 return DAG.getSelectCC(SDLoc(N),
1707 LHS.getOperand(0),
1708 LHS.getOperand(1),
1709 LHS.getOperand(2),
1710 LHS.getOperand(3),
1711 LHSCC);
1712 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001713 }
Tom Stellard5e524892013-03-08 15:37:11 +00001714 }
Tom Stellardcd428182013-09-28 02:50:38 +00001715 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001716 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001717
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001718 case AMDGPUISD::EXPORT: {
1719 SDValue Arg = N->getOperand(1);
1720 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1721 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001722
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001723 SDValue NewArgs[8] = {
1724 N->getOperand(0), // Chain
1725 SDValue(),
1726 N->getOperand(2), // ArrayBase
1727 N->getOperand(3), // Type
1728 N->getOperand(4), // SWZ_X
1729 N->getOperand(5), // SWZ_Y
1730 N->getOperand(6), // SWZ_Z
1731 N->getOperand(7) // SWZ_W
1732 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001733 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001734 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001735 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001736 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001737 case AMDGPUISD::TEXTURE_FETCH: {
1738 SDValue Arg = N->getOperand(1);
1739 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1740 break;
1741
1742 SDValue NewArgs[19] = {
1743 N->getOperand(0),
1744 N->getOperand(1),
1745 N->getOperand(2),
1746 N->getOperand(3),
1747 N->getOperand(4),
1748 N->getOperand(5),
1749 N->getOperand(6),
1750 N->getOperand(7),
1751 N->getOperand(8),
1752 N->getOperand(9),
1753 N->getOperand(10),
1754 N->getOperand(11),
1755 N->getOperand(12),
1756 N->getOperand(13),
1757 N->getOperand(14),
1758 N->getOperand(15),
1759 N->getOperand(16),
1760 N->getOperand(17),
1761 N->getOperand(18),
1762 };
1763 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1764 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00001765 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001766 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001767 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00001768
1769 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001770}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001771
1772static bool
1773FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001774 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001775 const R600InstrInfo *TII =
1776 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1777 if (!Src.isMachineOpcode())
1778 return false;
1779 switch (Src.getMachineOpcode()) {
1780 case AMDGPU::FNEG_R600:
1781 if (!Neg.getNode())
1782 return false;
1783 Src = Src.getOperand(0);
1784 Neg = DAG.getTargetConstant(1, MVT::i32);
1785 return true;
1786 case AMDGPU::FABS_R600:
1787 if (!Abs.getNode())
1788 return false;
1789 Src = Src.getOperand(0);
1790 Abs = DAG.getTargetConstant(1, MVT::i32);
1791 return true;
1792 case AMDGPU::CONST_COPY: {
1793 unsigned Opcode = ParentNode->getMachineOpcode();
1794 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1795
1796 if (!Sel.getNode())
1797 return false;
1798
1799 SDValue CstOffset = Src.getOperand(0);
1800 if (ParentNode->getValueType(0).isVector())
1801 return false;
1802
1803 // Gather constants values
1804 int SrcIndices[] = {
1805 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1806 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1807 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1808 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1809 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1810 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1811 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1812 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1813 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1814 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1815 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1816 };
1817 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00001818 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001819 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1820 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1821 continue;
1822 if (HasDst) {
1823 OtherSrcIdx--;
1824 OtherSelIdx--;
1825 }
1826 if (RegisterSDNode *Reg =
1827 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1828 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00001829 ConstantSDNode *Cst
1830 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001831 Consts.push_back(Cst->getZExtValue());
1832 }
1833 }
1834 }
1835
Matt Arsenault37c12d72014-05-12 20:42:57 +00001836 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001837 Consts.push_back(Cst->getZExtValue());
1838 if (!TII->fitsConstReadLimitations(Consts)) {
1839 return false;
1840 }
1841
1842 Sel = CstOffset;
1843 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1844 return true;
1845 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001846 case AMDGPU::MOV_IMM_I32:
1847 case AMDGPU::MOV_IMM_F32: {
1848 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1849 uint64_t ImmValue = 0;
1850
1851
1852 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1853 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1854 float FloatValue = FPC->getValueAPF().convertToFloat();
1855 if (FloatValue == 0.0) {
1856 ImmReg = AMDGPU::ZERO;
1857 } else if (FloatValue == 0.5) {
1858 ImmReg = AMDGPU::HALF;
1859 } else if (FloatValue == 1.0) {
1860 ImmReg = AMDGPU::ONE;
1861 } else {
1862 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1863 }
1864 } else {
1865 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1866 uint64_t Value = C->getZExtValue();
1867 if (Value == 0) {
1868 ImmReg = AMDGPU::ZERO;
1869 } else if (Value == 1) {
1870 ImmReg = AMDGPU::ONE_INT;
1871 } else {
1872 ImmValue = Value;
1873 }
1874 }
1875
1876 // Check that we aren't already using an immediate.
1877 // XXX: It's possible for an instruction to have more than one
1878 // immediate operand, but this is not supported yet.
1879 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1880 if (!Imm.getNode())
1881 return false;
1882 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1883 assert(C);
1884 if (C->getZExtValue())
1885 return false;
1886 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1887 }
1888 Src = DAG.getRegister(ImmReg, MVT::i32);
1889 return true;
1890 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001891 default:
1892 return false;
1893 }
1894}
1895
1896
1897/// \brief Fold the instructions after selecting them
1898SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1899 SelectionDAG &DAG) const {
1900 const R600InstrInfo *TII =
1901 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1902 if (!Node->isMachineOpcode())
1903 return Node;
1904 unsigned Opcode = Node->getMachineOpcode();
1905 SDValue FakeOp;
1906
1907 std::vector<SDValue> Ops;
1908 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1909 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001910 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001911
1912 if (Opcode == AMDGPU::DOT_4) {
1913 int OperandIdx[] = {
1914 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1915 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1916 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1917 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1918 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1919 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1920 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1921 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001922 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001923 int NegIdx[] = {
1924 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1925 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1926 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1927 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1928 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1929 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1930 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1931 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1932 };
1933 int AbsIdx[] = {
1934 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1935 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1936 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1937 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1938 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1939 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1940 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1941 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1942 };
1943 for (unsigned i = 0; i < 8; i++) {
1944 if (OperandIdx[i] < 0)
1945 return Node;
1946 SDValue &Src = Ops[OperandIdx[i] - 1];
1947 SDValue &Neg = Ops[NegIdx[i] - 1];
1948 SDValue &Abs = Ops[AbsIdx[i] - 1];
1949 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1950 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1951 if (HasDst)
1952 SelIdx--;
1953 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001954 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1955 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1956 }
1957 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1958 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1959 SDValue &Src = Ops[i];
1960 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001961 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1962 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001963 } else if (Opcode == AMDGPU::CLAMP_R600) {
1964 SDValue Src = Node->getOperand(0);
1965 if (!Src.isMachineOpcode() ||
1966 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1967 return Node;
1968 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1969 AMDGPU::OpName::clamp);
1970 if (ClampIdx < 0)
1971 return Node;
1972 std::vector<SDValue> Ops;
1973 unsigned NumOp = Src.getNumOperands();
1974 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001975 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001976 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1977 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1978 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001979 } else {
1980 if (!TII->hasInstrModifiers(Opcode))
1981 return Node;
1982 int OperandIdx[] = {
1983 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1984 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1985 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1986 };
1987 int NegIdx[] = {
1988 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1989 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1990 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1991 };
1992 int AbsIdx[] = {
1993 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1994 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1995 -1
1996 };
1997 for (unsigned i = 0; i < 3; i++) {
1998 if (OperandIdx[i] < 0)
1999 return Node;
2000 SDValue &Src = Ops[OperandIdx[i] - 1];
2001 SDValue &Neg = Ops[NegIdx[i] - 1];
2002 SDValue FakeAbs;
2003 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2004 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2005 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002006 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2007 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002008 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002009 ImmIdx--;
2010 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002011 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002012 SDValue &Imm = Ops[ImmIdx];
2013 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002014 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2015 }
2016 }
2017
2018 return Node;
2019}