blob: beea54e14e9aa65984d933c1a465f00d9d75ef31 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDILIntrinsicInfo.h"
17#include "AMDGPUFrameLowering.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000071
72 setOperationAction(ISD::FSUB, MVT::f32, Expand);
73
74 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
75 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000077
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
79 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
80
Tom Stellarde8f9f282013-03-08 15:37:05 +000081 setOperationAction(ISD::SETCC, MVT::i32, Expand);
82 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000083 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
84
Tom Stellard53f2f902013-09-05 18:38:03 +000085 setOperationAction(ISD::SELECT, MVT::i32, Expand);
86 setOperationAction(ISD::SELECT, MVT::f32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000088 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Matt Arsenault4e466652014-04-16 01:41:30 +000090 // Expand sign extension of vectors
91 if (!Subtarget->hasBFE())
92 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
93
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
96
97 if (!Subtarget->hasBFE())
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
101
102 if (!Subtarget->hasBFE())
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
106
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
112
113
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000114 // Legalize loads and stores to the private address space.
115 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000116 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000117 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000118
119 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
120 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000121 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
122 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000125 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
127
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000128 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000129 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000130 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000131 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000132 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
133 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
Tom Stellard365366f2013-01-23 02:09:06 +0000135 setOperationAction(ISD::LOAD, MVT::i32, Custom);
136 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000137 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
138
Tom Stellard880a80a2014-06-17 16:53:14 +0000139 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
140 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
141 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
143
144 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
145 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
147 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
148
Tom Stellard75aadc22012-12-11 21:25:42 +0000149 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000150 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000151 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000152 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000153 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000154
Tom Stellard5f337882014-04-29 23:12:43 +0000155 // These should be replaced by UDVIREM, but it does not happen automatically
156 // during Type Legalization
157 setOperationAction(ISD::UDIV, MVT::i64, Custom);
158 setOperationAction(ISD::UREM, MVT::i64, Custom);
159
Michel Danzer49812b52013-07-10 16:37:07 +0000160 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
161
Tom Stellardb852af52013-03-08 15:37:03 +0000162 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000163 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000164 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000165}
166
167MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
168 MachineInstr * MI, MachineBasicBlock * BB) const {
169 MachineFunction * MF = BB->getParent();
170 MachineRegisterInfo &MRI = MF->getRegInfo();
171 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000172 const R600InstrInfo *TII =
173 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000174
175 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000176 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000177 // Replace LDS_*_RET instruction that don't have any uses with the
178 // equivalent LDS_*_NORET instruction.
179 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000180 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
181 assert(DstIdx != -1);
182 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000183 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
184 return BB;
185
186 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
187 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000188 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
189 NewMI.addOperand(MI->getOperand(i));
190 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000191 } else {
192 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
193 }
194 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000195 case AMDGPU::CLAMP_R600: {
196 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
197 AMDGPU::MOV,
198 MI->getOperand(0).getReg(),
199 MI->getOperand(1).getReg());
200 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
201 break;
202 }
203
204 case AMDGPU::FABS_R600: {
205 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
206 AMDGPU::MOV,
207 MI->getOperand(0).getReg(),
208 MI->getOperand(1).getReg());
209 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
210 break;
211 }
212
213 case AMDGPU::FNEG_R600: {
214 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
215 AMDGPU::MOV,
216 MI->getOperand(0).getReg(),
217 MI->getOperand(1).getReg());
218 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
219 break;
220 }
221
Tom Stellard75aadc22012-12-11 21:25:42 +0000222 case AMDGPU::MASK_WRITE: {
223 unsigned maskedRegister = MI->getOperand(0).getReg();
224 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
225 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
226 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
227 break;
228 }
229
230 case AMDGPU::MOV_IMM_F32:
231 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
232 MI->getOperand(1).getFPImm()->getValueAPF()
233 .bitcastToAPInt().getZExtValue());
234 break;
235 case AMDGPU::MOV_IMM_I32:
236 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
237 MI->getOperand(1).getImm());
238 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000239 case AMDGPU::CONST_COPY: {
240 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
241 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000242 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000243 MI->getOperand(1).getImm());
244 break;
245 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000246
247 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000248 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000249 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000250 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000251
252 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
253 .addOperand(MI->getOperand(0))
254 .addOperand(MI->getOperand(1))
255 .addImm(EOP); // Set End of program bit
256 break;
257 }
258
Tom Stellard75aadc22012-12-11 21:25:42 +0000259 case AMDGPU::TXD: {
260 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
261 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000262 MachineOperand &RID = MI->getOperand(4);
263 MachineOperand &SID = MI->getOperand(5);
264 unsigned TextureId = MI->getOperand(6).getImm();
265 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
266 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000267
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000268 switch (TextureId) {
269 case 5: // Rect
270 CTX = CTY = 0;
271 break;
272 case 6: // Shadow1D
273 SrcW = SrcZ;
274 break;
275 case 7: // Shadow2D
276 SrcW = SrcZ;
277 break;
278 case 8: // ShadowRect
279 CTX = CTY = 0;
280 SrcW = SrcZ;
281 break;
282 case 9: // 1DArray
283 SrcZ = SrcY;
284 CTZ = 0;
285 break;
286 case 10: // 2DArray
287 CTZ = 0;
288 break;
289 case 11: // Shadow1DArray
290 SrcZ = SrcY;
291 CTZ = 0;
292 break;
293 case 12: // Shadow2DArray
294 CTZ = 0;
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
298 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000299 .addImm(SrcX)
300 .addImm(SrcY)
301 .addImm(SrcZ)
302 .addImm(SrcW)
303 .addImm(0)
304 .addImm(0)
305 .addImm(0)
306 .addImm(0)
307 .addImm(1)
308 .addImm(2)
309 .addImm(3)
310 .addOperand(RID)
311 .addOperand(SID)
312 .addImm(CTX)
313 .addImm(CTY)
314 .addImm(CTZ)
315 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000316 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
317 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000318 .addImm(SrcX)
319 .addImm(SrcY)
320 .addImm(SrcZ)
321 .addImm(SrcW)
322 .addImm(0)
323 .addImm(0)
324 .addImm(0)
325 .addImm(0)
326 .addImm(1)
327 .addImm(2)
328 .addImm(3)
329 .addOperand(RID)
330 .addOperand(SID)
331 .addImm(CTX)
332 .addImm(CTY)
333 .addImm(CTZ)
334 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000335 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
336 .addOperand(MI->getOperand(0))
337 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 .addReg(T0, RegState::Implicit)
356 .addReg(T1, RegState::Implicit);
357 break;
358 }
359
360 case AMDGPU::TXD_SHADOW: {
361 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
362 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000363 MachineOperand &RID = MI->getOperand(4);
364 MachineOperand &SID = MI->getOperand(5);
365 unsigned TextureId = MI->getOperand(6).getImm();
366 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
367 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
368
369 switch (TextureId) {
370 case 5: // Rect
371 CTX = CTY = 0;
372 break;
373 case 6: // Shadow1D
374 SrcW = SrcZ;
375 break;
376 case 7: // Shadow2D
377 SrcW = SrcZ;
378 break;
379 case 8: // ShadowRect
380 CTX = CTY = 0;
381 SrcW = SrcZ;
382 break;
383 case 9: // 1DArray
384 SrcZ = SrcY;
385 CTZ = 0;
386 break;
387 case 10: // 2DArray
388 CTZ = 0;
389 break;
390 case 11: // Shadow1DArray
391 SrcZ = SrcY;
392 CTZ = 0;
393 break;
394 case 12: // Shadow2DArray
395 CTZ = 0;
396 break;
397 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000398
399 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
400 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000401 .addImm(SrcX)
402 .addImm(SrcY)
403 .addImm(SrcZ)
404 .addImm(SrcW)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(0)
409 .addImm(1)
410 .addImm(2)
411 .addImm(3)
412 .addOperand(RID)
413 .addOperand(SID)
414 .addImm(CTX)
415 .addImm(CTY)
416 .addImm(CTZ)
417 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000418 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
419 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000420 .addImm(SrcX)
421 .addImm(SrcY)
422 .addImm(SrcZ)
423 .addImm(SrcW)
424 .addImm(0)
425 .addImm(0)
426 .addImm(0)
427 .addImm(0)
428 .addImm(1)
429 .addImm(2)
430 .addImm(3)
431 .addOperand(RID)
432 .addOperand(SID)
433 .addImm(CTX)
434 .addImm(CTY)
435 .addImm(CTZ)
436 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000437 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
438 .addOperand(MI->getOperand(0))
439 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 .addReg(T0, RegState::Implicit)
458 .addReg(T1, RegState::Implicit);
459 break;
460 }
461
462 case AMDGPU::BRANCH:
463 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000464 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000465 break;
466
467 case AMDGPU::BRANCH_COND_f32: {
468 MachineInstr *NewMI =
469 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
470 AMDGPU::PREDICATE_BIT)
471 .addOperand(MI->getOperand(1))
472 .addImm(OPCODE_IS_NOT_ZERO)
473 .addImm(0); // Flags
474 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 .addOperand(MI->getOperand(0))
477 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
478 break;
479 }
480
481 case AMDGPU::BRANCH_COND_i32: {
482 MachineInstr *NewMI =
483 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
484 AMDGPU::PREDICATE_BIT)
485 .addOperand(MI->getOperand(1))
486 .addImm(OPCODE_IS_NOT_ZERO_INT)
487 .addImm(0); // Flags
488 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000489 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000490 .addOperand(MI->getOperand(0))
491 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
492 break;
493 }
494
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 case AMDGPU::EG_ExportSwz:
496 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000497 // Instruction is left unmodified if its not the last one of its type
498 bool isLastInstructionOfItsType = true;
499 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000500 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000501 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000502 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000503 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
504 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
505 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
506 .getImm();
507 if (CurrentInstExportType == InstExportType) {
508 isLastInstructionOfItsType = false;
509 break;
510 }
511 }
512 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000513 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000514 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 return BB;
516 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
517 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
518 .addOperand(MI->getOperand(0))
519 .addOperand(MI->getOperand(1))
520 .addOperand(MI->getOperand(2))
521 .addOperand(MI->getOperand(3))
522 .addOperand(MI->getOperand(4))
523 .addOperand(MI->getOperand(5))
524 .addOperand(MI->getOperand(6))
525 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000526 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000527 break;
528 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000529 case AMDGPU::RETURN: {
530 // RETURN instructions must have the live-out registers as implicit uses,
531 // otherwise they appear dead.
532 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
533 MachineInstrBuilder MIB(*MF, MI);
534 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
535 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
536 return BB;
537 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000538 }
539
540 MI->eraseFromParent();
541 return BB;
542}
543
544//===----------------------------------------------------------------------===//
545// Custom DAG Lowering Operations
546//===----------------------------------------------------------------------===//
547
Tom Stellard75aadc22012-12-11 21:25:42 +0000548SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000549 MachineFunction &MF = DAG.getMachineFunction();
550 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000551 switch (Op.getOpcode()) {
552 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000553 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
554 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000555 case ISD::FCOS:
556 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000558 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000559 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000560 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000561 case ISD::INTRINSIC_VOID: {
562 SDValue Chain = Op.getOperand(0);
563 unsigned IntrinsicID =
564 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
565 switch (IntrinsicID) {
566 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000567 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
568 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000569 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000570 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000571 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000572 case AMDGPUIntrinsic::R600_store_swizzle: {
573 const SDValue Args[8] = {
574 Chain,
575 Op.getOperand(2), // Export Value
576 Op.getOperand(3), // ArrayBase
577 Op.getOperand(4), // Type
578 DAG.getConstant(0, MVT::i32), // SWZ_X
579 DAG.getConstant(1, MVT::i32), // SWZ_Y
580 DAG.getConstant(2, MVT::i32), // SWZ_Z
581 DAG.getConstant(3, MVT::i32) // SWZ_W
582 };
Craig Topper48d114b2014-04-26 18:35:24 +0000583 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000584 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000585
Tom Stellard75aadc22012-12-11 21:25:42 +0000586 // default for switch(IntrinsicID)
587 default: break;
588 }
589 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
590 break;
591 }
592 case ISD::INTRINSIC_WO_CHAIN: {
593 unsigned IntrinsicID =
594 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
595 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000596 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000597 switch(IntrinsicID) {
598 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000599 case AMDGPUIntrinsic::R600_load_input: {
600 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
601 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
602 MachineFunction &MF = DAG.getMachineFunction();
603 MachineRegisterInfo &MRI = MF.getRegInfo();
604 MRI.addLiveIn(Reg);
605 return DAG.getCopyFromReg(DAG.getEntryNode(),
606 SDLoc(DAG.getEntryNode()), Reg, VT);
607 }
608
609 case AMDGPUIntrinsic::R600_interp_input: {
610 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
611 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
612 MachineSDNode *interp;
613 if (ijb < 0) {
614 const MachineFunction &MF = DAG.getMachineFunction();
615 const R600InstrInfo *TII =
616 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
617 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
618 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
619 return DAG.getTargetExtractSubreg(
620 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
621 DL, MVT::f32, SDValue(interp, 0));
622 }
623 MachineFunction &MF = DAG.getMachineFunction();
624 MachineRegisterInfo &MRI = MF.getRegInfo();
625 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
626 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
627 MRI.addLiveIn(RegisterI);
628 MRI.addLiveIn(RegisterJ);
629 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
630 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
631 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
632 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
633
634 if (slot % 4 < 2)
635 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
636 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
637 RegisterJNode, RegisterINode);
638 else
639 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
640 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
641 RegisterJNode, RegisterINode);
642 return SDValue(interp, slot % 2);
643 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000644 case AMDGPUIntrinsic::R600_interp_xy:
645 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000646 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000647 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000648 SDValue RegisterINode = Op.getOperand(2);
649 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000650
Vincent Lejeunef143af32013-11-11 22:10:24 +0000651 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000652 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000653 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000654 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000655 else
656 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000657 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000658 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000659 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
660 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000661 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000662 case AMDGPUIntrinsic::R600_tex:
663 case AMDGPUIntrinsic::R600_texc:
664 case AMDGPUIntrinsic::R600_txl:
665 case AMDGPUIntrinsic::R600_txlc:
666 case AMDGPUIntrinsic::R600_txb:
667 case AMDGPUIntrinsic::R600_txbc:
668 case AMDGPUIntrinsic::R600_txf:
669 case AMDGPUIntrinsic::R600_txq:
670 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000671 case AMDGPUIntrinsic::R600_ddy:
672 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000673 unsigned TextureOp;
674 switch (IntrinsicID) {
675 case AMDGPUIntrinsic::R600_tex:
676 TextureOp = 0;
677 break;
678 case AMDGPUIntrinsic::R600_texc:
679 TextureOp = 1;
680 break;
681 case AMDGPUIntrinsic::R600_txl:
682 TextureOp = 2;
683 break;
684 case AMDGPUIntrinsic::R600_txlc:
685 TextureOp = 3;
686 break;
687 case AMDGPUIntrinsic::R600_txb:
688 TextureOp = 4;
689 break;
690 case AMDGPUIntrinsic::R600_txbc:
691 TextureOp = 5;
692 break;
693 case AMDGPUIntrinsic::R600_txf:
694 TextureOp = 6;
695 break;
696 case AMDGPUIntrinsic::R600_txq:
697 TextureOp = 7;
698 break;
699 case AMDGPUIntrinsic::R600_ddx:
700 TextureOp = 8;
701 break;
702 case AMDGPUIntrinsic::R600_ddy:
703 TextureOp = 9;
704 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000705 case AMDGPUIntrinsic::R600_ldptr:
706 TextureOp = 10;
707 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000708 default:
709 llvm_unreachable("Unknow Texture Operation");
710 }
711
712 SDValue TexArgs[19] = {
713 DAG.getConstant(TextureOp, MVT::i32),
714 Op.getOperand(1),
715 DAG.getConstant(0, MVT::i32),
716 DAG.getConstant(1, MVT::i32),
717 DAG.getConstant(2, MVT::i32),
718 DAG.getConstant(3, MVT::i32),
719 Op.getOperand(2),
720 Op.getOperand(3),
721 Op.getOperand(4),
722 DAG.getConstant(0, MVT::i32),
723 DAG.getConstant(1, MVT::i32),
724 DAG.getConstant(2, MVT::i32),
725 DAG.getConstant(3, MVT::i32),
726 Op.getOperand(5),
727 Op.getOperand(6),
728 Op.getOperand(7),
729 Op.getOperand(8),
730 Op.getOperand(9),
731 Op.getOperand(10)
732 };
Craig Topper48d114b2014-04-26 18:35:24 +0000733 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000734 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000735 case AMDGPUIntrinsic::AMDGPU_dp4: {
736 SDValue Args[8] = {
737 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
738 DAG.getConstant(0, MVT::i32)),
739 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
740 DAG.getConstant(0, MVT::i32)),
741 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
742 DAG.getConstant(1, MVT::i32)),
743 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
744 DAG.getConstant(1, MVT::i32)),
745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
746 DAG.getConstant(2, MVT::i32)),
747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
748 DAG.getConstant(2, MVT::i32)),
749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
750 DAG.getConstant(3, MVT::i32)),
751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
752 DAG.getConstant(3, MVT::i32))
753 };
Craig Topper48d114b2014-04-26 18:35:24 +0000754 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000756
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000757 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000758 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000759 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000760 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000763 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000764 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 8);
775
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
778 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000779 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
781 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
784 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000785 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000786 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
787 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000788 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000789 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
790 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000791 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000792 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
793 AMDGPU::T0_Z, VT);
794 }
795 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
796 break;
797 }
798 } // end switch(Op.getOpcode())
799 return SDValue();
800}
801
802void R600TargetLowering::ReplaceNodeResults(SDNode *N,
803 SmallVectorImpl<SDValue> &Results,
804 SelectionDAG &DAG) const {
805 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000806 default:
807 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
808 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000810 return;
811 case ISD::LOAD: {
812 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
813 Results.push_back(SDValue(Node, 0));
814 Results.push_back(SDValue(Node, 1));
815 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
816 // function
817 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
818 return;
819 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000820 case ISD::STORE:
821 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
822 Results.push_back(SDValue(Node, 0));
823 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000824 }
825}
826
Tom Stellard880a80a2014-06-17 16:53:14 +0000827SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
828 SDValue Vector) const {
829
830 SDLoc DL(Vector);
831 EVT VecVT = Vector.getValueType();
832 EVT EltVT = VecVT.getVectorElementType();
833 SmallVector<SDValue, 8> Args;
834
835 for (unsigned i = 0, e = VecVT.getVectorNumElements();
836 i != e; ++i) {
837 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
838 Vector, DAG.getConstant(i, getVectorIdxTy())));
839 }
840
841 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
842}
843
844SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
845 SelectionDAG &DAG) const {
846
847 SDLoc DL(Op);
848 SDValue Vector = Op.getOperand(0);
849 SDValue Index = Op.getOperand(1);
850
851 if (isa<ConstantSDNode>(Index) ||
852 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
853 return Op;
854
855 Vector = vectorToVerticalVector(DAG, Vector);
856 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
857 Vector, Index);
858}
859
860SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
861 SelectionDAG &DAG) const {
862 SDLoc DL(Op);
863 SDValue Vector = Op.getOperand(0);
864 SDValue Value = Op.getOperand(1);
865 SDValue Index = Op.getOperand(2);
866
867 if (isa<ConstantSDNode>(Index) ||
868 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
869 return Op;
870
871 Vector = vectorToVerticalVector(DAG, Vector);
872 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
873 Vector, Value, Index);
874 return vectorToVerticalVector(DAG, Insert);
875}
876
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000877SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
878 // On hw >= R700, COS/SIN input must be between -1. and 1.
879 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
880 EVT VT = Op.getValueType();
881 SDValue Arg = Op.getOperand(0);
882 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
883 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
884 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
885 DAG.getConstantFP(0.15915494309, MVT::f32)),
886 DAG.getConstantFP(0.5, MVT::f32)));
887 unsigned TrigNode;
888 switch (Op.getOpcode()) {
889 case ISD::FCOS:
890 TrigNode = AMDGPUISD::COS_HW;
891 break;
892 case ISD::FSIN:
893 TrigNode = AMDGPUISD::SIN_HW;
894 break;
895 default:
896 llvm_unreachable("Wrong trig opcode");
897 }
898 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
899 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
900 DAG.getConstantFP(-0.5, MVT::f32)));
901 if (Gen >= AMDGPUSubtarget::R700)
902 return TrigVal;
903 // On R600 hw, COS/SIN input must be between -Pi and Pi.
904 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
905 DAG.getConstantFP(3.14159265359, MVT::f32));
906}
907
Tom Stellard75aadc22012-12-11 21:25:42 +0000908SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
909 return DAG.getNode(
910 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000911 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000912 MVT::i1,
913 Op, DAG.getConstantFP(0.0f, MVT::f32),
914 DAG.getCondCode(ISD::SETNE)
915 );
916}
917
Tom Stellard75aadc22012-12-11 21:25:42 +0000918SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000919 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000920 unsigned DwordOffset) const {
921 unsigned ByteOffset = DwordOffset * 4;
922 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000923 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000924
925 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
926 assert(isInt<16>(ByteOffset));
927
928 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
929 DAG.getConstant(ByteOffset, MVT::i32), // PTR
930 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
931 false, false, false, 0);
932}
933
Tom Stellard75aadc22012-12-11 21:25:42 +0000934bool R600TargetLowering::isZero(SDValue Op) const {
935 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
936 return Cst->isNullValue();
937 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
938 return CstFP->isZero();
939 } else {
940 return false;
941 }
942}
943
944SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000945 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000946 EVT VT = Op.getValueType();
947
948 SDValue LHS = Op.getOperand(0);
949 SDValue RHS = Op.getOperand(1);
950 SDValue True = Op.getOperand(2);
951 SDValue False = Op.getOperand(3);
952 SDValue CC = Op.getOperand(4);
953 SDValue Temp;
954
955 // LHS and RHS are guaranteed to be the same value type
956 EVT CompareVT = LHS.getValueType();
957
958 // Check if we can lower this to a native operation.
959
Tom Stellard2add82d2013-03-08 15:37:09 +0000960 // Try to lower to a SET* instruction:
961 //
962 // SET* can match the following patterns:
963 //
Tom Stellardcd428182013-09-28 02:50:38 +0000964 // select_cc f32, f32, -1, 0, cc_supported
965 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
966 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000967 //
968
969 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000970 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
971 ISD::CondCode InverseCC =
972 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000973 if (isHWTrueValue(False) && isHWFalseValue(True)) {
974 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
975 std::swap(False, True);
976 CC = DAG.getCondCode(InverseCC);
977 } else {
978 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
979 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
980 std::swap(False, True);
981 std::swap(LHS, RHS);
982 CC = DAG.getCondCode(SwapInvCC);
983 }
984 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000985 }
986
987 if (isHWTrueValue(True) && isHWFalseValue(False) &&
988 (CompareVT == VT || VT == MVT::i32)) {
989 // This can be matched by a SET* instruction.
990 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
991 }
992
Tom Stellard75aadc22012-12-11 21:25:42 +0000993 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000994 //
995 // CND* can match the following patterns:
996 //
Tom Stellardcd428182013-09-28 02:50:38 +0000997 // select_cc f32, 0.0, f32, f32, cc_supported
998 // select_cc f32, 0.0, i32, i32, cc_supported
999 // select_cc i32, 0, f32, f32, cc_supported
1000 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001001 //
Tom Stellardcd428182013-09-28 02:50:38 +00001002
1003 // Try to move the zero value to the RHS
1004 if (isZero(LHS)) {
1005 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1006 // Try swapping the operands
1007 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1008 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1009 std::swap(LHS, RHS);
1010 CC = DAG.getCondCode(CCSwapped);
1011 } else {
1012 // Try inverting the conditon and then swapping the operands
1013 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1014 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1015 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1016 std::swap(True, False);
1017 std::swap(LHS, RHS);
1018 CC = DAG.getCondCode(CCSwapped);
1019 }
1020 }
1021 }
1022 if (isZero(RHS)) {
1023 SDValue Cond = LHS;
1024 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001025 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1026 if (CompareVT != VT) {
1027 // Bitcast True / False to the correct types. This will end up being
1028 // a nop, but it allows us to define only a single pattern in the
1029 // .TD files for each CND* instruction rather than having to have
1030 // one pattern for integer True/False and one for fp True/False
1031 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1032 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1033 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001034
1035 switch (CCOpcode) {
1036 case ISD::SETONE:
1037 case ISD::SETUNE:
1038 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001039 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1040 Temp = True;
1041 True = False;
1042 False = Temp;
1043 break;
1044 default:
1045 break;
1046 }
1047 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1048 Cond, Zero,
1049 True, False,
1050 DAG.getCondCode(CCOpcode));
1051 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1052 }
1053
Tom Stellard75aadc22012-12-11 21:25:42 +00001054 // If we make it this for it means we have no native instructions to handle
1055 // this SELECT_CC, so we must lower it.
1056 SDValue HWTrue, HWFalse;
1057
1058 if (CompareVT == MVT::f32) {
1059 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1060 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1061 } else if (CompareVT == MVT::i32) {
1062 HWTrue = DAG.getConstant(-1, CompareVT);
1063 HWFalse = DAG.getConstant(0, CompareVT);
1064 }
1065 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001066 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001067 }
1068
1069 // Lower this unsupported SELECT_CC into a combination of two supported
1070 // SELECT_CC operations.
1071 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1072
1073 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1074 Cond, HWFalse,
1075 True, False,
1076 DAG.getCondCode(ISD::SETNE));
1077}
1078
Alp Tokercb402912014-01-24 17:20:08 +00001079/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001080/// convert these pointers to a register index. Each register holds
1081/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1082/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1083/// for indirect addressing.
1084SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1085 unsigned StackWidth,
1086 SelectionDAG &DAG) const {
1087 unsigned SRLPad;
1088 switch(StackWidth) {
1089 case 1:
1090 SRLPad = 2;
1091 break;
1092 case 2:
1093 SRLPad = 3;
1094 break;
1095 case 4:
1096 SRLPad = 4;
1097 break;
1098 default: llvm_unreachable("Invalid stack width");
1099 }
1100
Andrew Trickef9de2a2013-05-25 02:42:55 +00001101 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001102 DAG.getConstant(SRLPad, MVT::i32));
1103}
1104
1105void R600TargetLowering::getStackAddress(unsigned StackWidth,
1106 unsigned ElemIdx,
1107 unsigned &Channel,
1108 unsigned &PtrIncr) const {
1109 switch (StackWidth) {
1110 default:
1111 case 1:
1112 Channel = 0;
1113 if (ElemIdx > 0) {
1114 PtrIncr = 1;
1115 } else {
1116 PtrIncr = 0;
1117 }
1118 break;
1119 case 2:
1120 Channel = ElemIdx % 2;
1121 if (ElemIdx == 2) {
1122 PtrIncr = 1;
1123 } else {
1124 PtrIncr = 0;
1125 }
1126 break;
1127 case 4:
1128 Channel = ElemIdx;
1129 PtrIncr = 0;
1130 break;
1131 }
1132}
1133
Tom Stellard75aadc22012-12-11 21:25:42 +00001134SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001135 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001136 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1137 SDValue Chain = Op.getOperand(0);
1138 SDValue Value = Op.getOperand(1);
1139 SDValue Ptr = Op.getOperand(2);
1140
Tom Stellard2ffc3302013-08-26 15:05:44 +00001141 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001142 if (Result.getNode()) {
1143 return Result;
1144 }
1145
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001146 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1147 if (StoreNode->isTruncatingStore()) {
1148 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001149 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001150 EVT MemVT = StoreNode->getMemoryVT();
1151 SDValue MaskConstant;
1152 if (MemVT == MVT::i8) {
1153 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1154 } else {
1155 assert(MemVT == MVT::i16);
1156 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1157 }
1158 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1159 DAG.getConstant(2, MVT::i32));
1160 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1161 DAG.getConstant(0x00000003, VT));
1162 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1163 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1164 DAG.getConstant(3, VT));
1165 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1166 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1167 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1168 // vector instead.
1169 SDValue Src[4] = {
1170 ShiftedValue,
1171 DAG.getConstant(0, MVT::i32),
1172 DAG.getConstant(0, MVT::i32),
1173 Mask
1174 };
Craig Topper48d114b2014-04-26 18:35:24 +00001175 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001176 SDValue Args[3] = { Chain, Input, DWordAddr };
1177 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001178 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001179 StoreNode->getMemOperand());
1180 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1181 Value.getValueType().bitsGE(MVT::i32)) {
1182 // Convert pointer from byte address to dword address.
1183 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1184 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1185 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001186
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001187 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001188 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001189 } else {
1190 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1191 }
1192 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001193 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001194 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001195
1196 EVT ValueVT = Value.getValueType();
1197
1198 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1199 return SDValue();
1200 }
1201
Tom Stellarde9373602014-01-22 19:24:14 +00001202 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1203 if (Ret.getNode()) {
1204 return Ret;
1205 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001206 // Lowering for indirect addressing
1207
1208 const MachineFunction &MF = DAG.getMachineFunction();
1209 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1210 getTargetMachine().getFrameLowering());
1211 unsigned StackWidth = TFL->getStackWidth(MF);
1212
1213 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1214
1215 if (ValueVT.isVector()) {
1216 unsigned NumElemVT = ValueVT.getVectorNumElements();
1217 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001218 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001219
1220 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1221 "vector width in load");
1222
1223 for (unsigned i = 0; i < NumElemVT; ++i) {
1224 unsigned Channel, PtrIncr;
1225 getStackAddress(StackWidth, i, Channel, PtrIncr);
1226 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1227 DAG.getConstant(PtrIncr, MVT::i32));
1228 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1229 Value, DAG.getConstant(i, MVT::i32));
1230
1231 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1232 Chain, Elem, Ptr,
1233 DAG.getTargetConstant(Channel, MVT::i32));
1234 }
Craig Topper48d114b2014-04-26 18:35:24 +00001235 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001236 } else {
1237 if (ValueVT == MVT::i8) {
1238 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1239 }
1240 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001241 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001242 }
1243
1244 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001245}
1246
Tom Stellard365366f2013-01-23 02:09:06 +00001247// return (512 + (kc_bank << 12)
1248static int
1249ConstantAddressBlock(unsigned AddressSpace) {
1250 switch (AddressSpace) {
1251 case AMDGPUAS::CONSTANT_BUFFER_0:
1252 return 512;
1253 case AMDGPUAS::CONSTANT_BUFFER_1:
1254 return 512 + 4096;
1255 case AMDGPUAS::CONSTANT_BUFFER_2:
1256 return 512 + 4096 * 2;
1257 case AMDGPUAS::CONSTANT_BUFFER_3:
1258 return 512 + 4096 * 3;
1259 case AMDGPUAS::CONSTANT_BUFFER_4:
1260 return 512 + 4096 * 4;
1261 case AMDGPUAS::CONSTANT_BUFFER_5:
1262 return 512 + 4096 * 5;
1263 case AMDGPUAS::CONSTANT_BUFFER_6:
1264 return 512 + 4096 * 6;
1265 case AMDGPUAS::CONSTANT_BUFFER_7:
1266 return 512 + 4096 * 7;
1267 case AMDGPUAS::CONSTANT_BUFFER_8:
1268 return 512 + 4096 * 8;
1269 case AMDGPUAS::CONSTANT_BUFFER_9:
1270 return 512 + 4096 * 9;
1271 case AMDGPUAS::CONSTANT_BUFFER_10:
1272 return 512 + 4096 * 10;
1273 case AMDGPUAS::CONSTANT_BUFFER_11:
1274 return 512 + 4096 * 11;
1275 case AMDGPUAS::CONSTANT_BUFFER_12:
1276 return 512 + 4096 * 12;
1277 case AMDGPUAS::CONSTANT_BUFFER_13:
1278 return 512 + 4096 * 13;
1279 case AMDGPUAS::CONSTANT_BUFFER_14:
1280 return 512 + 4096 * 14;
1281 case AMDGPUAS::CONSTANT_BUFFER_15:
1282 return 512 + 4096 * 15;
1283 default:
1284 return -1;
1285 }
1286}
1287
1288SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1289{
1290 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001291 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001292 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1293 SDValue Chain = Op.getOperand(0);
1294 SDValue Ptr = Op.getOperand(1);
1295 SDValue LoweredLoad;
1296
Tom Stellarde9373602014-01-22 19:24:14 +00001297 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1298 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001299 SDValue Ops[2] = {
1300 Ret,
1301 Chain
1302 };
Craig Topper64941d92014-04-27 19:20:57 +00001303 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001304 }
1305
1306
Tom Stellard35bb18c2013-08-26 15:06:04 +00001307 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1308 SDValue MergedValues[2] = {
1309 SplitVectorLoad(Op, DAG),
1310 Chain
1311 };
Craig Topper64941d92014-04-27 19:20:57 +00001312 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001313 }
1314
Tom Stellard365366f2013-01-23 02:09:06 +00001315 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001316 if (ConstantBlock > -1 &&
1317 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1318 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001319 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001320 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1321 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001322 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001323 SDValue Slots[4];
1324 for (unsigned i = 0; i < 4; i++) {
1325 // We want Const position encoded with the following formula :
1326 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1327 // const_index is Ptr computed by llvm using an alignment of 16.
1328 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1329 // then div by 4 at the ISel step
1330 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1331 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1332 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1333 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001334 EVT NewVT = MVT::v4i32;
1335 unsigned NumElements = 4;
1336 if (VT.isVector()) {
1337 NewVT = VT;
1338 NumElements = VT.getVectorNumElements();
1339 }
Craig Topper48d114b2014-04-26 18:35:24 +00001340 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001341 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001342 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001343 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001344 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001345 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001346 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001347 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001348 );
1349 }
1350
1351 if (!VT.isVector()) {
1352 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1353 DAG.getConstant(0, MVT::i32));
1354 }
1355
1356 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001357 Result,
1358 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001359 };
Craig Topper64941d92014-04-27 19:20:57 +00001360 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001361 }
1362
Matt Arsenault909d0c02013-10-30 23:43:29 +00001363 // For most operations returning SDValue() will result in the node being
1364 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1365 // need to manually expand loads that may be legal in some address spaces and
1366 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1367 // compute shaders, since the data is sign extended when it is uploaded to the
1368 // buffer. However SEXT loads from other address spaces are not supported, so
1369 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001370 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1371 EVT MemVT = LoadNode->getMemoryVT();
1372 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1373 SDValue ShiftAmount =
1374 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1375 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1376 LoadNode->getPointerInfo(), MemVT,
1377 LoadNode->isVolatile(),
1378 LoadNode->isNonTemporal(),
1379 LoadNode->getAlignment());
1380 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1381 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1382
1383 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001384 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001385 }
1386
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001387 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1388 return SDValue();
1389 }
1390
1391 // Lowering for indirect addressing
1392 const MachineFunction &MF = DAG.getMachineFunction();
1393 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1394 getTargetMachine().getFrameLowering());
1395 unsigned StackWidth = TFL->getStackWidth(MF);
1396
1397 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1398
1399 if (VT.isVector()) {
1400 unsigned NumElemVT = VT.getVectorNumElements();
1401 EVT ElemVT = VT.getVectorElementType();
1402 SDValue Loads[4];
1403
1404 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1405 "vector width in load");
1406
1407 for (unsigned i = 0; i < NumElemVT; ++i) {
1408 unsigned Channel, PtrIncr;
1409 getStackAddress(StackWidth, i, Channel, PtrIncr);
1410 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1411 DAG.getConstant(PtrIncr, MVT::i32));
1412 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1413 Chain, Ptr,
1414 DAG.getTargetConstant(Channel, MVT::i32),
1415 Op.getOperand(2));
1416 }
1417 for (unsigned i = NumElemVT; i < 4; ++i) {
1418 Loads[i] = DAG.getUNDEF(ElemVT);
1419 }
1420 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001421 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001422 } else {
1423 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1424 Chain, Ptr,
1425 DAG.getTargetConstant(0, MVT::i32), // Channel
1426 Op.getOperand(2));
1427 }
1428
Matt Arsenault7939acd2014-04-07 16:44:24 +00001429 SDValue Ops[2] = {
1430 LoweredLoad,
1431 Chain
1432 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001433
Craig Topper64941d92014-04-27 19:20:57 +00001434 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001435}
Tom Stellard75aadc22012-12-11 21:25:42 +00001436
Tom Stellard75aadc22012-12-11 21:25:42 +00001437/// XXX Only kernel functions are supported, so we can assume for now that
1438/// every function is a kernel function, but in the future we should use
1439/// separate calling conventions for kernel and non-kernel functions.
1440SDValue R600TargetLowering::LowerFormalArguments(
1441 SDValue Chain,
1442 CallingConv::ID CallConv,
1443 bool isVarArg,
1444 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001445 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001446 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001447 SmallVector<CCValAssign, 16> ArgLocs;
1448 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1449 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001450 MachineFunction &MF = DAG.getMachineFunction();
1451 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001452
Tom Stellardaf775432013-10-23 00:44:32 +00001453 SmallVector<ISD::InputArg, 8> LocalIns;
1454
Matt Arsenault209a7b92014-04-18 07:40:20 +00001455 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001456
1457 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001458
Tom Stellard1e803092013-07-23 01:48:18 +00001459 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001460 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001461 EVT VT = Ins[i].VT;
1462 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001463
Vincent Lejeunef143af32013-11-11 22:10:24 +00001464 if (ShaderType != ShaderType::COMPUTE) {
1465 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1466 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1467 InVals.push_back(Register);
1468 continue;
1469 }
1470
Tom Stellard75aadc22012-12-11 21:25:42 +00001471 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001472 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001473
Matt Arsenaultfae02982014-03-17 18:58:11 +00001474 // i64 isn't a legal type, so the register type used ends up as i32, which
1475 // isn't expected here. It attempts to create this sextload, but it ends up
1476 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1477 // for <1 x i64>.
1478
Tom Stellardacfeebf2013-07-23 01:48:05 +00001479 // The first 36 bytes of the input buffer contains information about
1480 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001481
1482 // FIXME: This should really check the extload type, but the handling of
1483 // extload vecto parameters seems to be broken.
1484 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1485 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1486 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001487 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1488 MachinePointerInfo(UndefValue::get(PtrTy)),
1489 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001490
1491 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001492 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001493 }
1494 return Chain;
1495}
1496
Matt Arsenault758659232013-05-18 00:21:46 +00001497EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001498 if (!VT.isVector())
1499 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001500 return VT.changeVectorElementTypeToInteger();
1501}
1502
Matt Arsenault209a7b92014-04-18 07:40:20 +00001503static SDValue CompactSwizzlableVector(
1504 SelectionDAG &DAG, SDValue VectorEntry,
1505 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001506 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1507 assert(RemapSwizzle.empty());
1508 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001509 VectorEntry.getOperand(0),
1510 VectorEntry.getOperand(1),
1511 VectorEntry.getOperand(2),
1512 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001513 };
1514
1515 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001516 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1517 // We mask write here to teach later passes that the ith element of this
1518 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1519 // break false dependencies and additionnaly make assembly easier to read.
1520 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001521 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1522 if (C->isZero()) {
1523 RemapSwizzle[i] = 4; // SEL_0
1524 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1525 } else if (C->isExactlyValue(1.0)) {
1526 RemapSwizzle[i] = 5; // SEL_1
1527 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1528 }
1529 }
1530
1531 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1532 continue;
1533 for (unsigned j = 0; j < i; j++) {
1534 if (NewBldVec[i] == NewBldVec[j]) {
1535 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1536 RemapSwizzle[i] = j;
1537 break;
1538 }
1539 }
1540 }
1541
1542 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001543 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001544}
1545
Benjamin Kramer193960c2013-06-11 13:32:25 +00001546static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1547 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001548 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1549 assert(RemapSwizzle.empty());
1550 SDValue NewBldVec[4] = {
1551 VectorEntry.getOperand(0),
1552 VectorEntry.getOperand(1),
1553 VectorEntry.getOperand(2),
1554 VectorEntry.getOperand(3)
1555 };
1556 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001557 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001558 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001559 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1560 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1561 ->getZExtValue();
1562 if (i == Idx)
1563 isUnmovable[Idx] = true;
1564 }
1565 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001566
1567 for (unsigned i = 0; i < 4; i++) {
1568 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1569 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1570 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001571 if (isUnmovable[Idx])
1572 continue;
1573 // Swap i and Idx
1574 std::swap(NewBldVec[Idx], NewBldVec[i]);
1575 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1576 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001577 }
1578 }
1579
1580 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001581 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001582}
1583
1584
1585SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1586SDValue Swz[4], SelectionDAG &DAG) const {
1587 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1588 // Old -> New swizzle values
1589 DenseMap<unsigned, unsigned> SwizzleRemap;
1590
1591 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1592 for (unsigned i = 0; i < 4; i++) {
1593 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1594 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1595 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1596 }
1597
1598 SwizzleRemap.clear();
1599 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1600 for (unsigned i = 0; i < 4; i++) {
1601 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1602 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1603 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1604 }
1605
1606 return BuildVector;
1607}
1608
1609
Tom Stellard75aadc22012-12-11 21:25:42 +00001610//===----------------------------------------------------------------------===//
1611// Custom DAG Optimizations
1612//===----------------------------------------------------------------------===//
1613
1614SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1615 DAGCombinerInfo &DCI) const {
1616 SelectionDAG &DAG = DCI.DAG;
1617
1618 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001619 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001620 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1621 case ISD::FP_ROUND: {
1622 SDValue Arg = N->getOperand(0);
1623 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001624 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001625 Arg.getOperand(0));
1626 }
1627 break;
1628 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001629
1630 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1631 // (i32 select_cc f32, f32, -1, 0 cc)
1632 //
1633 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1634 // this to one of the SET*_DX10 instructions.
1635 case ISD::FP_TO_SINT: {
1636 SDValue FNeg = N->getOperand(0);
1637 if (FNeg.getOpcode() != ISD::FNEG) {
1638 return SDValue();
1639 }
1640 SDValue SelectCC = FNeg.getOperand(0);
1641 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1642 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1643 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1644 !isHWTrueValue(SelectCC.getOperand(2)) ||
1645 !isHWFalseValue(SelectCC.getOperand(3))) {
1646 return SDValue();
1647 }
1648
Andrew Trickef9de2a2013-05-25 02:42:55 +00001649 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001650 SelectCC.getOperand(0), // LHS
1651 SelectCC.getOperand(1), // RHS
1652 DAG.getConstant(-1, MVT::i32), // True
1653 DAG.getConstant(0, MVT::i32), // Flase
1654 SelectCC.getOperand(4)); // CC
1655
1656 break;
1657 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001658
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001659 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1660 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001661 case ISD::INSERT_VECTOR_ELT: {
1662 SDValue InVec = N->getOperand(0);
1663 SDValue InVal = N->getOperand(1);
1664 SDValue EltNo = N->getOperand(2);
1665 SDLoc dl(N);
1666
1667 // If the inserted element is an UNDEF, just use the input vector.
1668 if (InVal.getOpcode() == ISD::UNDEF)
1669 return InVec;
1670
1671 EVT VT = InVec.getValueType();
1672
1673 // If we can't generate a legal BUILD_VECTOR, exit
1674 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1675 return SDValue();
1676
1677 // Check that we know which element is being inserted
1678 if (!isa<ConstantSDNode>(EltNo))
1679 return SDValue();
1680 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1681
1682 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1683 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1684 // vector elements.
1685 SmallVector<SDValue, 8> Ops;
1686 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1687 Ops.append(InVec.getNode()->op_begin(),
1688 InVec.getNode()->op_end());
1689 } else if (InVec.getOpcode() == ISD::UNDEF) {
1690 unsigned NElts = VT.getVectorNumElements();
1691 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1692 } else {
1693 return SDValue();
1694 }
1695
1696 // Insert the element
1697 if (Elt < Ops.size()) {
1698 // All the operands of BUILD_VECTOR must have the same type;
1699 // we enforce that here.
1700 EVT OpVT = Ops[0].getValueType();
1701 if (InVal.getValueType() != OpVT)
1702 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1703 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1704 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1705 Ops[Elt] = InVal;
1706 }
1707
1708 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001709 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001710 }
1711
Tom Stellard365366f2013-01-23 02:09:06 +00001712 // Extract_vec (Build_vector) generated by custom lowering
1713 // also needs to be customly combined
1714 case ISD::EXTRACT_VECTOR_ELT: {
1715 SDValue Arg = N->getOperand(0);
1716 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1717 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1718 unsigned Element = Const->getZExtValue();
1719 return Arg->getOperand(Element);
1720 }
1721 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001722 if (Arg.getOpcode() == ISD::BITCAST &&
1723 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1724 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1725 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001726 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001727 Arg->getOperand(0).getOperand(Element));
1728 }
1729 }
Tom Stellard365366f2013-01-23 02:09:06 +00001730 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001731
1732 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001733 // Try common optimizations
1734 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1735 if (Ret.getNode())
1736 return Ret;
1737
Tom Stellarde06163a2013-02-07 14:02:35 +00001738 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1739 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001740 //
1741 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1742 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001743 SDValue LHS = N->getOperand(0);
1744 if (LHS.getOpcode() != ISD::SELECT_CC) {
1745 return SDValue();
1746 }
1747
1748 SDValue RHS = N->getOperand(1);
1749 SDValue True = N->getOperand(2);
1750 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001751 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001752
1753 if (LHS.getOperand(2).getNode() != True.getNode() ||
1754 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001755 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001756 return SDValue();
1757 }
1758
Tom Stellard5e524892013-03-08 15:37:11 +00001759 switch (NCC) {
1760 default: return SDValue();
1761 case ISD::SETNE: return LHS;
1762 case ISD::SETEQ: {
1763 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1764 LHSCC = ISD::getSetCCInverse(LHSCC,
1765 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001766 if (DCI.isBeforeLegalizeOps() ||
1767 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1768 return DAG.getSelectCC(SDLoc(N),
1769 LHS.getOperand(0),
1770 LHS.getOperand(1),
1771 LHS.getOperand(2),
1772 LHS.getOperand(3),
1773 LHSCC);
1774 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001775 }
Tom Stellard5e524892013-03-08 15:37:11 +00001776 }
Tom Stellardcd428182013-09-28 02:50:38 +00001777 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001778 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001779
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001780 case AMDGPUISD::EXPORT: {
1781 SDValue Arg = N->getOperand(1);
1782 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1783 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001784
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001785 SDValue NewArgs[8] = {
1786 N->getOperand(0), // Chain
1787 SDValue(),
1788 N->getOperand(2), // ArrayBase
1789 N->getOperand(3), // Type
1790 N->getOperand(4), // SWZ_X
1791 N->getOperand(5), // SWZ_Y
1792 N->getOperand(6), // SWZ_Z
1793 N->getOperand(7) // SWZ_W
1794 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001795 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001796 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001797 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001798 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001799 case AMDGPUISD::TEXTURE_FETCH: {
1800 SDValue Arg = N->getOperand(1);
1801 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1802 break;
1803
1804 SDValue NewArgs[19] = {
1805 N->getOperand(0),
1806 N->getOperand(1),
1807 N->getOperand(2),
1808 N->getOperand(3),
1809 N->getOperand(4),
1810 N->getOperand(5),
1811 N->getOperand(6),
1812 N->getOperand(7),
1813 N->getOperand(8),
1814 N->getOperand(9),
1815 N->getOperand(10),
1816 N->getOperand(11),
1817 N->getOperand(12),
1818 N->getOperand(13),
1819 N->getOperand(14),
1820 N->getOperand(15),
1821 N->getOperand(16),
1822 N->getOperand(17),
1823 N->getOperand(18),
1824 };
1825 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1826 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00001827 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001828 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001829 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00001830
1831 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001832}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001833
1834static bool
1835FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001836 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001837 const R600InstrInfo *TII =
1838 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1839 if (!Src.isMachineOpcode())
1840 return false;
1841 switch (Src.getMachineOpcode()) {
1842 case AMDGPU::FNEG_R600:
1843 if (!Neg.getNode())
1844 return false;
1845 Src = Src.getOperand(0);
1846 Neg = DAG.getTargetConstant(1, MVT::i32);
1847 return true;
1848 case AMDGPU::FABS_R600:
1849 if (!Abs.getNode())
1850 return false;
1851 Src = Src.getOperand(0);
1852 Abs = DAG.getTargetConstant(1, MVT::i32);
1853 return true;
1854 case AMDGPU::CONST_COPY: {
1855 unsigned Opcode = ParentNode->getMachineOpcode();
1856 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1857
1858 if (!Sel.getNode())
1859 return false;
1860
1861 SDValue CstOffset = Src.getOperand(0);
1862 if (ParentNode->getValueType(0).isVector())
1863 return false;
1864
1865 // Gather constants values
1866 int SrcIndices[] = {
1867 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1868 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1869 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1870 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1871 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1872 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1873 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1874 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1875 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1876 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1877 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1878 };
1879 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00001880 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001881 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1882 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1883 continue;
1884 if (HasDst) {
1885 OtherSrcIdx--;
1886 OtherSelIdx--;
1887 }
1888 if (RegisterSDNode *Reg =
1889 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1890 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00001891 ConstantSDNode *Cst
1892 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001893 Consts.push_back(Cst->getZExtValue());
1894 }
1895 }
1896 }
1897
Matt Arsenault37c12d72014-05-12 20:42:57 +00001898 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001899 Consts.push_back(Cst->getZExtValue());
1900 if (!TII->fitsConstReadLimitations(Consts)) {
1901 return false;
1902 }
1903
1904 Sel = CstOffset;
1905 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1906 return true;
1907 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001908 case AMDGPU::MOV_IMM_I32:
1909 case AMDGPU::MOV_IMM_F32: {
1910 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1911 uint64_t ImmValue = 0;
1912
1913
1914 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1915 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1916 float FloatValue = FPC->getValueAPF().convertToFloat();
1917 if (FloatValue == 0.0) {
1918 ImmReg = AMDGPU::ZERO;
1919 } else if (FloatValue == 0.5) {
1920 ImmReg = AMDGPU::HALF;
1921 } else if (FloatValue == 1.0) {
1922 ImmReg = AMDGPU::ONE;
1923 } else {
1924 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1925 }
1926 } else {
1927 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1928 uint64_t Value = C->getZExtValue();
1929 if (Value == 0) {
1930 ImmReg = AMDGPU::ZERO;
1931 } else if (Value == 1) {
1932 ImmReg = AMDGPU::ONE_INT;
1933 } else {
1934 ImmValue = Value;
1935 }
1936 }
1937
1938 // Check that we aren't already using an immediate.
1939 // XXX: It's possible for an instruction to have more than one
1940 // immediate operand, but this is not supported yet.
1941 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1942 if (!Imm.getNode())
1943 return false;
1944 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1945 assert(C);
1946 if (C->getZExtValue())
1947 return false;
1948 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1949 }
1950 Src = DAG.getRegister(ImmReg, MVT::i32);
1951 return true;
1952 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001953 default:
1954 return false;
1955 }
1956}
1957
1958
1959/// \brief Fold the instructions after selecting them
1960SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1961 SelectionDAG &DAG) const {
1962 const R600InstrInfo *TII =
1963 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1964 if (!Node->isMachineOpcode())
1965 return Node;
1966 unsigned Opcode = Node->getMachineOpcode();
1967 SDValue FakeOp;
1968
1969 std::vector<SDValue> Ops;
1970 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1971 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001972 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001973
1974 if (Opcode == AMDGPU::DOT_4) {
1975 int OperandIdx[] = {
1976 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1977 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1978 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1979 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1980 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1981 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1982 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1983 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001984 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001985 int NegIdx[] = {
1986 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1987 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1988 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1989 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1990 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1991 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1992 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1993 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1994 };
1995 int AbsIdx[] = {
1996 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1997 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1998 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1999 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2000 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2001 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2002 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2003 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2004 };
2005 for (unsigned i = 0; i < 8; i++) {
2006 if (OperandIdx[i] < 0)
2007 return Node;
2008 SDValue &Src = Ops[OperandIdx[i] - 1];
2009 SDValue &Neg = Ops[NegIdx[i] - 1];
2010 SDValue &Abs = Ops[AbsIdx[i] - 1];
2011 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2012 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2013 if (HasDst)
2014 SelIdx--;
2015 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002016 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2017 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2018 }
2019 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2020 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2021 SDValue &Src = Ops[i];
2022 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002023 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2024 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002025 } else if (Opcode == AMDGPU::CLAMP_R600) {
2026 SDValue Src = Node->getOperand(0);
2027 if (!Src.isMachineOpcode() ||
2028 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2029 return Node;
2030 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2031 AMDGPU::OpName::clamp);
2032 if (ClampIdx < 0)
2033 return Node;
2034 std::vector<SDValue> Ops;
2035 unsigned NumOp = Src.getNumOperands();
2036 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002037 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002038 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2039 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2040 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002041 } else {
2042 if (!TII->hasInstrModifiers(Opcode))
2043 return Node;
2044 int OperandIdx[] = {
2045 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2046 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2047 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2048 };
2049 int NegIdx[] = {
2050 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2051 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2052 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2053 };
2054 int AbsIdx[] = {
2055 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2056 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2057 -1
2058 };
2059 for (unsigned i = 0; i < 3; i++) {
2060 if (OperandIdx[i] < 0)
2061 return Node;
2062 SDValue &Src = Ops[OperandIdx[i] - 1];
2063 SDValue &Neg = Ops[NegIdx[i] - 1];
2064 SDValue FakeAbs;
2065 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2066 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2067 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002068 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2069 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002070 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002071 ImmIdx--;
2072 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002073 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002074 SDValue &Imm = Ops[ImmIdx];
2075 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002076 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2077 }
2078 }
2079
2080 return Node;
2081}