blob: 4ed5c881491d8f958c35196e7db3937273eff1d6 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
289
Tom Stellard75aadc22012-12-11 21:25:42 +0000290 case AMDGPU::TXD: {
291 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
292 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000293 MachineOperand &RID = MI->getOperand(4);
294 MachineOperand &SID = MI->getOperand(5);
295 unsigned TextureId = MI->getOperand(6).getImm();
296 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
297 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000298
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000299 switch (TextureId) {
300 case 5: // Rect
301 CTX = CTY = 0;
302 break;
303 case 6: // Shadow1D
304 SrcW = SrcZ;
305 break;
306 case 7: // Shadow2D
307 SrcW = SrcZ;
308 break;
309 case 8: // ShadowRect
310 CTX = CTY = 0;
311 SrcW = SrcZ;
312 break;
313 case 9: // 1DArray
314 SrcZ = SrcY;
315 CTZ = 0;
316 break;
317 case 10: // 2DArray
318 CTZ = 0;
319 break;
320 case 11: // Shadow1DArray
321 SrcZ = SrcY;
322 CTZ = 0;
323 break;
324 case 12: // Shadow2DArray
325 CTZ = 0;
326 break;
327 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000328 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
329 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000330 .addImm(SrcX)
331 .addImm(SrcY)
332 .addImm(SrcZ)
333 .addImm(SrcW)
334 .addImm(0)
335 .addImm(0)
336 .addImm(0)
337 .addImm(0)
338 .addImm(1)
339 .addImm(2)
340 .addImm(3)
341 .addOperand(RID)
342 .addOperand(SID)
343 .addImm(CTX)
344 .addImm(CTY)
345 .addImm(CTZ)
346 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000347 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
348 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000349 .addImm(SrcX)
350 .addImm(SrcY)
351 .addImm(SrcZ)
352 .addImm(SrcW)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(0)
357 .addImm(1)
358 .addImm(2)
359 .addImm(3)
360 .addOperand(RID)
361 .addOperand(SID)
362 .addImm(CTX)
363 .addImm(CTY)
364 .addImm(CTZ)
365 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000366 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
367 .addOperand(MI->getOperand(0))
368 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000369 .addImm(SrcX)
370 .addImm(SrcY)
371 .addImm(SrcZ)
372 .addImm(SrcW)
373 .addImm(0)
374 .addImm(0)
375 .addImm(0)
376 .addImm(0)
377 .addImm(1)
378 .addImm(2)
379 .addImm(3)
380 .addOperand(RID)
381 .addOperand(SID)
382 .addImm(CTX)
383 .addImm(CTY)
384 .addImm(CTZ)
385 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000386 .addReg(T0, RegState::Implicit)
387 .addReg(T1, RegState::Implicit);
388 break;
389 }
390
391 case AMDGPU::TXD_SHADOW: {
392 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
393 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000394 MachineOperand &RID = MI->getOperand(4);
395 MachineOperand &SID = MI->getOperand(5);
396 unsigned TextureId = MI->getOperand(6).getImm();
397 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
398 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
399
400 switch (TextureId) {
401 case 5: // Rect
402 CTX = CTY = 0;
403 break;
404 case 6: // Shadow1D
405 SrcW = SrcZ;
406 break;
407 case 7: // Shadow2D
408 SrcW = SrcZ;
409 break;
410 case 8: // ShadowRect
411 CTX = CTY = 0;
412 SrcW = SrcZ;
413 break;
414 case 9: // 1DArray
415 SrcZ = SrcY;
416 CTZ = 0;
417 break;
418 case 10: // 2DArray
419 CTZ = 0;
420 break;
421 case 11: // Shadow1DArray
422 SrcZ = SrcY;
423 CTZ = 0;
424 break;
425 case 12: // Shadow2DArray
426 CTZ = 0;
427 break;
428 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000429
430 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
431 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000432 .addImm(SrcX)
433 .addImm(SrcY)
434 .addImm(SrcZ)
435 .addImm(SrcW)
436 .addImm(0)
437 .addImm(0)
438 .addImm(0)
439 .addImm(0)
440 .addImm(1)
441 .addImm(2)
442 .addImm(3)
443 .addOperand(RID)
444 .addOperand(SID)
445 .addImm(CTX)
446 .addImm(CTY)
447 .addImm(CTZ)
448 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000449 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
450 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000451 .addImm(SrcX)
452 .addImm(SrcY)
453 .addImm(SrcZ)
454 .addImm(SrcW)
455 .addImm(0)
456 .addImm(0)
457 .addImm(0)
458 .addImm(0)
459 .addImm(1)
460 .addImm(2)
461 .addImm(3)
462 .addOperand(RID)
463 .addOperand(SID)
464 .addImm(CTX)
465 .addImm(CTY)
466 .addImm(CTZ)
467 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000468 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
469 .addOperand(MI->getOperand(0))
470 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000471 .addImm(SrcX)
472 .addImm(SrcY)
473 .addImm(SrcZ)
474 .addImm(SrcW)
475 .addImm(0)
476 .addImm(0)
477 .addImm(0)
478 .addImm(0)
479 .addImm(1)
480 .addImm(2)
481 .addImm(3)
482 .addOperand(RID)
483 .addOperand(SID)
484 .addImm(CTX)
485 .addImm(CTY)
486 .addImm(CTZ)
487 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000488 .addReg(T0, RegState::Implicit)
489 .addReg(T1, RegState::Implicit);
490 break;
491 }
492
493 case AMDGPU::BRANCH:
494 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000495 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 break;
497
498 case AMDGPU::BRANCH_COND_f32: {
499 MachineInstr *NewMI =
500 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
501 AMDGPU::PREDICATE_BIT)
502 .addOperand(MI->getOperand(1))
503 .addImm(OPCODE_IS_NOT_ZERO)
504 .addImm(0); // Flags
505 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000506 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000507 .addOperand(MI->getOperand(0))
508 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
509 break;
510 }
511
512 case AMDGPU::BRANCH_COND_i32: {
513 MachineInstr *NewMI =
514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
515 AMDGPU::PREDICATE_BIT)
516 .addOperand(MI->getOperand(1))
517 .addImm(OPCODE_IS_NOT_ZERO_INT)
518 .addImm(0); // Flags
519 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000520 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000521 .addOperand(MI->getOperand(0))
522 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
523 break;
524 }
525
Tom Stellard75aadc22012-12-11 21:25:42 +0000526 case AMDGPU::EG_ExportSwz:
527 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000528 // Instruction is left unmodified if its not the last one of its type
529 bool isLastInstructionOfItsType = true;
530 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000531 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000532 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000533 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000534 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
535 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
536 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
537 .getImm();
538 if (CurrentInstExportType == InstExportType) {
539 isLastInstructionOfItsType = false;
540 break;
541 }
542 }
543 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000544 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000545 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 return BB;
547 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
548 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
549 .addOperand(MI->getOperand(0))
550 .addOperand(MI->getOperand(1))
551 .addOperand(MI->getOperand(2))
552 .addOperand(MI->getOperand(3))
553 .addOperand(MI->getOperand(4))
554 .addOperand(MI->getOperand(5))
555 .addOperand(MI->getOperand(6))
556 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000557 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000558 break;
559 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000560 case AMDGPU::RETURN: {
561 // RETURN instructions must have the live-out registers as implicit uses,
562 // otherwise they appear dead.
563 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
564 MachineInstrBuilder MIB(*MF, MI);
565 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
566 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
567 return BB;
568 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000569 }
570
571 MI->eraseFromParent();
572 return BB;
573}
574
575//===----------------------------------------------------------------------===//
576// Custom DAG Lowering Operations
577//===----------------------------------------------------------------------===//
578
Tom Stellard75aadc22012-12-11 21:25:42 +0000579SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000580 MachineFunction &MF = DAG.getMachineFunction();
581 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000582 switch (Op.getOpcode()) {
583 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000584 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
585 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000586 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000587 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000588 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000589 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
590 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000591 case ISD::FCOS:
592 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000593 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000594 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000595 case ISD::LOAD: {
596 SDValue Result = LowerLOAD(Op, DAG);
597 assert((!Result.getNode() ||
598 Result.getNode()->getNumValues() == 2) &&
599 "Load should return a value and a chain");
600 return Result;
601 }
602
Matt Arsenault1d555c42014-06-23 18:00:55 +0000603 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000604 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 case ISD::INTRINSIC_VOID: {
606 SDValue Chain = Op.getOperand(0);
607 unsigned IntrinsicID =
608 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
609 switch (IntrinsicID) {
610 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000611 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
612 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000613 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000614 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000615 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000616 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000617 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000618 const SDValue Args[8] = {
619 Chain,
620 Op.getOperand(2), // Export Value
621 Op.getOperand(3), // ArrayBase
622 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000623 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
624 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
625 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
626 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000627 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000628 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000629 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000630
Tom Stellard75aadc22012-12-11 21:25:42 +0000631 // default for switch(IntrinsicID)
632 default: break;
633 }
634 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
635 break;
636 }
637 case ISD::INTRINSIC_WO_CHAIN: {
638 unsigned IntrinsicID =
639 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
640 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000641 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000642 switch(IntrinsicID) {
643 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000644 case AMDGPUIntrinsic::R600_load_input: {
645 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
646 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
647 MachineFunction &MF = DAG.getMachineFunction();
648 MachineRegisterInfo &MRI = MF.getRegInfo();
649 MRI.addLiveIn(Reg);
650 return DAG.getCopyFromReg(DAG.getEntryNode(),
651 SDLoc(DAG.getEntryNode()), Reg, VT);
652 }
653
654 case AMDGPUIntrinsic::R600_interp_input: {
655 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
656 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
657 MachineSDNode *interp;
658 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000659 const R600InstrInfo *TII =
660 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000661 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000662 MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000663 return DAG.getTargetExtractSubreg(
664 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
665 DL, MVT::f32, SDValue(interp, 0));
666 }
667 MachineFunction &MF = DAG.getMachineFunction();
668 MachineRegisterInfo &MRI = MF.getRegInfo();
669 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
670 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
671 MRI.addLiveIn(RegisterI);
672 MRI.addLiveIn(RegisterJ);
673 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
674 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
675 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
676 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
677
678 if (slot % 4 < 2)
679 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000680 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000681 RegisterJNode, RegisterINode);
682 else
683 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000684 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000685 RegisterJNode, RegisterINode);
686 return SDValue(interp, slot % 2);
687 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000688 case AMDGPUIntrinsic::R600_interp_xy:
689 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000691 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000692 SDValue RegisterINode = Op.getOperand(2);
693 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000694
Vincent Lejeunef143af32013-11-11 22:10:24 +0000695 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000696 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000697 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000698 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000699 else
700 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000701 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000702 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000703 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
704 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000705 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000706 case AMDGPUIntrinsic::R600_tex:
707 case AMDGPUIntrinsic::R600_texc:
708 case AMDGPUIntrinsic::R600_txl:
709 case AMDGPUIntrinsic::R600_txlc:
710 case AMDGPUIntrinsic::R600_txb:
711 case AMDGPUIntrinsic::R600_txbc:
712 case AMDGPUIntrinsic::R600_txf:
713 case AMDGPUIntrinsic::R600_txq:
714 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000715 case AMDGPUIntrinsic::R600_ddy:
716 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000717 unsigned TextureOp;
718 switch (IntrinsicID) {
719 case AMDGPUIntrinsic::R600_tex:
720 TextureOp = 0;
721 break;
722 case AMDGPUIntrinsic::R600_texc:
723 TextureOp = 1;
724 break;
725 case AMDGPUIntrinsic::R600_txl:
726 TextureOp = 2;
727 break;
728 case AMDGPUIntrinsic::R600_txlc:
729 TextureOp = 3;
730 break;
731 case AMDGPUIntrinsic::R600_txb:
732 TextureOp = 4;
733 break;
734 case AMDGPUIntrinsic::R600_txbc:
735 TextureOp = 5;
736 break;
737 case AMDGPUIntrinsic::R600_txf:
738 TextureOp = 6;
739 break;
740 case AMDGPUIntrinsic::R600_txq:
741 TextureOp = 7;
742 break;
743 case AMDGPUIntrinsic::R600_ddx:
744 TextureOp = 8;
745 break;
746 case AMDGPUIntrinsic::R600_ddy:
747 TextureOp = 9;
748 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000749 case AMDGPUIntrinsic::R600_ldptr:
750 TextureOp = 10;
751 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000752 default:
753 llvm_unreachable("Unknow Texture Operation");
754 }
755
756 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000757 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000758 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000759 DAG.getConstant(0, DL, MVT::i32),
760 DAG.getConstant(1, DL, MVT::i32),
761 DAG.getConstant(2, DL, MVT::i32),
762 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000763 Op.getOperand(2),
764 Op.getOperand(3),
765 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000766 DAG.getConstant(0, DL, MVT::i32),
767 DAG.getConstant(1, DL, MVT::i32),
768 DAG.getConstant(2, DL, MVT::i32),
769 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000770 Op.getOperand(5),
771 Op.getOperand(6),
772 Op.getOperand(7),
773 Op.getOperand(8),
774 Op.getOperand(9),
775 Op.getOperand(10)
776 };
Craig Topper48d114b2014-04-26 18:35:24 +0000777 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000778 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000779 case AMDGPUIntrinsic::AMDGPU_dp4: {
780 SDValue Args[8] = {
781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000782 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000784 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000786 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000788 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000789 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000790 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000791 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000792 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000793 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000794 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000795 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000796 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000797 };
Craig Topper48d114b2014-04-26 18:35:24 +0000798 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000799 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000800
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000813 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000817 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000818 return LowerImplicitParameter(DAG, VT, DL, 8);
819
Tom Stellarddcb9f092015-07-09 21:20:37 +0000820 case Intrinsic::AMDGPU_read_workdim: {
821 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
822 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
823 }
Jan Veselye5121f32014-10-14 20:05:26 +0000824
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000825 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000826 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
827 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000828 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000829 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
830 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000831 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000832 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
833 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000834 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000835 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
836 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000837 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000838 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
839 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000840 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000841 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
842 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000843 case Intrinsic::AMDGPU_rsq:
844 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
845 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Marek Olsak43650e42015-03-24 13:40:08 +0000846
847 case AMDGPUIntrinsic::AMDGPU_fract:
848 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
849 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000850 }
851 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
852 break;
853 }
854 } // end switch(Op.getOpcode())
855 return SDValue();
856}
857
858void R600TargetLowering::ReplaceNodeResults(SDNode *N,
859 SmallVectorImpl<SDValue> &Results,
860 SelectionDAG &DAG) const {
861 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000862 default:
863 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
864 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000865 case ISD::FP_TO_UINT:
866 if (N->getValueType(0) == MVT::i1) {
867 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
868 return;
869 }
870 // Fall-through. Since we don't care about out of bounds values
871 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
872 // considers some extra cases which are not necessary here.
873 case ISD::FP_TO_SINT: {
874 SDValue Result;
875 if (expandFP_TO_SINT(N, Result, DAG))
876 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000877 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000878 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000879 case ISD::SDIVREM: {
880 SDValue Op = SDValue(N, 1);
881 SDValue RES = LowerSDIVREM(Op, DAG);
882 Results.push_back(RES);
883 Results.push_back(RES.getValue(1));
884 break;
885 }
886 case ISD::UDIVREM: {
887 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000888 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000889 break;
890 }
891 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000892}
893
Tom Stellard880a80a2014-06-17 16:53:14 +0000894SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
895 SDValue Vector) const {
896
897 SDLoc DL(Vector);
898 EVT VecVT = Vector.getValueType();
899 EVT EltVT = VecVT.getVectorElementType();
900 SmallVector<SDValue, 8> Args;
901
902 for (unsigned i = 0, e = VecVT.getVectorNumElements();
903 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000904 Args.push_back(DAG.getNode(
905 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
906 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000907 }
908
909 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
910}
911
912SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
913 SelectionDAG &DAG) const {
914
915 SDLoc DL(Op);
916 SDValue Vector = Op.getOperand(0);
917 SDValue Index = Op.getOperand(1);
918
919 if (isa<ConstantSDNode>(Index) ||
920 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
921 return Op;
922
923 Vector = vectorToVerticalVector(DAG, Vector);
924 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
925 Vector, Index);
926}
927
928SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
929 SelectionDAG &DAG) const {
930 SDLoc DL(Op);
931 SDValue Vector = Op.getOperand(0);
932 SDValue Value = Op.getOperand(1);
933 SDValue Index = Op.getOperand(2);
934
935 if (isa<ConstantSDNode>(Index) ||
936 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
937 return Op;
938
939 Vector = vectorToVerticalVector(DAG, Vector);
940 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
941 Vector, Value, Index);
942 return vectorToVerticalVector(DAG, Insert);
943}
944
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000945SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
946 // On hw >= R700, COS/SIN input must be between -1. and 1.
947 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
948 EVT VT = Op.getValueType();
949 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000950 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000951
952 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000953 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
954 DAG.getNode(ISD::FADD, DL, VT,
955 DAG.getNode(ISD::FMUL, DL, VT, Arg,
956 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
957 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000958 unsigned TrigNode;
959 switch (Op.getOpcode()) {
960 case ISD::FCOS:
961 TrigNode = AMDGPUISD::COS_HW;
962 break;
963 case ISD::FSIN:
964 TrigNode = AMDGPUISD::SIN_HW;
965 break;
966 default:
967 llvm_unreachable("Wrong trig opcode");
968 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000969 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
970 DAG.getNode(ISD::FADD, DL, VT, FractPart,
971 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000972 if (Gen >= AMDGPUSubtarget::R700)
973 return TrigVal;
974 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000975 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
976 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000977}
978
Jan Vesely25f36272014-06-18 12:27:13 +0000979SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
980 SDLoc DL(Op);
981 EVT VT = Op.getValueType();
982
983 SDValue Lo = Op.getOperand(0);
984 SDValue Hi = Op.getOperand(1);
985 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000986 SDValue Zero = DAG.getConstant(0, DL, VT);
987 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000988
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000989 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
990 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000991 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
992 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
993
994 // The dance around Width1 is necessary for 0 special case.
995 // Without it the CompShift might be 32, producing incorrect results in
996 // Overflow. So we do the shift in two steps, the alternative is to
997 // add a conditional to filter the special case.
998
999 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1000 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1001
1002 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1003 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1004 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1005
1006 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1007 SDValue LoBig = Zero;
1008
1009 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1010 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1011
1012 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1013}
1014
Jan Vesely900ff2e2014-06-18 12:27:15 +00001015SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1016 SDLoc DL(Op);
1017 EVT VT = Op.getValueType();
1018
1019 SDValue Lo = Op.getOperand(0);
1020 SDValue Hi = Op.getOperand(1);
1021 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001022 SDValue Zero = DAG.getConstant(0, DL, VT);
1023 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001024
Jan Veselyecf51332014-06-18 12:27:17 +00001025 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1026
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001027 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1028 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001029 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1030 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1031
1032 // The dance around Width1 is necessary for 0 special case.
1033 // Without it the CompShift might be 32, producing incorrect results in
1034 // Overflow. So we do the shift in two steps, the alternative is to
1035 // add a conditional to filter the special case.
1036
1037 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1038 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1039
Jan Veselyecf51332014-06-18 12:27:17 +00001040 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001041 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1042 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1043
Jan Veselyecf51332014-06-18 12:27:17 +00001044 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1045 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001046
1047 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1048 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1049
1050 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1051}
1052
Jan Vesely808fff52015-04-30 17:15:56 +00001053SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1054 unsigned mainop, unsigned ovf) const {
1055 SDLoc DL(Op);
1056 EVT VT = Op.getValueType();
1057
1058 SDValue Lo = Op.getOperand(0);
1059 SDValue Hi = Op.getOperand(1);
1060
1061 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1062 // Extend sign.
1063 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1064 DAG.getValueType(MVT::i1));
1065
1066 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1067
1068 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1069}
1070
Tom Stellard75aadc22012-12-11 21:25:42 +00001071SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001072 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001073 return DAG.getNode(
1074 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001075 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001076 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001077 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001078 DAG.getCondCode(ISD::SETNE)
1079 );
1080}
1081
Tom Stellard75aadc22012-12-11 21:25:42 +00001082SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001083 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001084 unsigned DwordOffset) const {
1085 unsigned ByteOffset = DwordOffset * 4;
1086 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001087 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001088
1089 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1090 assert(isInt<16>(ByteOffset));
1091
1092 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001093 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001094 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1095 false, false, false, 0);
1096}
1097
Tom Stellard75aadc22012-12-11 21:25:42 +00001098bool R600TargetLowering::isZero(SDValue Op) const {
1099 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1100 return Cst->isNullValue();
1101 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1102 return CstFP->isZero();
1103 } else {
1104 return false;
1105 }
1106}
1107
1108SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001109 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001110 EVT VT = Op.getValueType();
1111
1112 SDValue LHS = Op.getOperand(0);
1113 SDValue RHS = Op.getOperand(1);
1114 SDValue True = Op.getOperand(2);
1115 SDValue False = Op.getOperand(3);
1116 SDValue CC = Op.getOperand(4);
1117 SDValue Temp;
1118
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001119 if (VT == MVT::f32) {
1120 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1121 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1122 if (MinMax)
1123 return MinMax;
1124 }
1125
Tom Stellard75aadc22012-12-11 21:25:42 +00001126 // LHS and RHS are guaranteed to be the same value type
1127 EVT CompareVT = LHS.getValueType();
1128
1129 // Check if we can lower this to a native operation.
1130
Tom Stellard2add82d2013-03-08 15:37:09 +00001131 // Try to lower to a SET* instruction:
1132 //
1133 // SET* can match the following patterns:
1134 //
Tom Stellardcd428182013-09-28 02:50:38 +00001135 // select_cc f32, f32, -1, 0, cc_supported
1136 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1137 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001138 //
1139
1140 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001141 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1142 ISD::CondCode InverseCC =
1143 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001144 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1145 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1146 std::swap(False, True);
1147 CC = DAG.getCondCode(InverseCC);
1148 } else {
1149 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1150 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1151 std::swap(False, True);
1152 std::swap(LHS, RHS);
1153 CC = DAG.getCondCode(SwapInvCC);
1154 }
1155 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001156 }
1157
1158 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1159 (CompareVT == VT || VT == MVT::i32)) {
1160 // This can be matched by a SET* instruction.
1161 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1162 }
1163
Tom Stellard75aadc22012-12-11 21:25:42 +00001164 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001165 //
1166 // CND* can match the following patterns:
1167 //
Tom Stellardcd428182013-09-28 02:50:38 +00001168 // select_cc f32, 0.0, f32, f32, cc_supported
1169 // select_cc f32, 0.0, i32, i32, cc_supported
1170 // select_cc i32, 0, f32, f32, cc_supported
1171 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001172 //
Tom Stellardcd428182013-09-28 02:50:38 +00001173
1174 // Try to move the zero value to the RHS
1175 if (isZero(LHS)) {
1176 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1177 // Try swapping the operands
1178 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1179 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1180 std::swap(LHS, RHS);
1181 CC = DAG.getCondCode(CCSwapped);
1182 } else {
1183 // Try inverting the conditon and then swapping the operands
1184 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1185 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1186 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1187 std::swap(True, False);
1188 std::swap(LHS, RHS);
1189 CC = DAG.getCondCode(CCSwapped);
1190 }
1191 }
1192 }
1193 if (isZero(RHS)) {
1194 SDValue Cond = LHS;
1195 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001196 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1197 if (CompareVT != VT) {
1198 // Bitcast True / False to the correct types. This will end up being
1199 // a nop, but it allows us to define only a single pattern in the
1200 // .TD files for each CND* instruction rather than having to have
1201 // one pattern for integer True/False and one for fp True/False
1202 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1203 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1204 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001205
1206 switch (CCOpcode) {
1207 case ISD::SETONE:
1208 case ISD::SETUNE:
1209 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001210 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1211 Temp = True;
1212 True = False;
1213 False = Temp;
1214 break;
1215 default:
1216 break;
1217 }
1218 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1219 Cond, Zero,
1220 True, False,
1221 DAG.getCondCode(CCOpcode));
1222 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1223 }
1224
Tom Stellard75aadc22012-12-11 21:25:42 +00001225 // If we make it this for it means we have no native instructions to handle
1226 // this SELECT_CC, so we must lower it.
1227 SDValue HWTrue, HWFalse;
1228
1229 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001230 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1231 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001232 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001233 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1234 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001235 }
1236 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001237 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001238 }
1239
1240 // Lower this unsupported SELECT_CC into a combination of two supported
1241 // SELECT_CC operations.
1242 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1243
1244 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1245 Cond, HWFalse,
1246 True, False,
1247 DAG.getCondCode(ISD::SETNE));
1248}
1249
Alp Tokercb402912014-01-24 17:20:08 +00001250/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001251/// convert these pointers to a register index. Each register holds
1252/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1253/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1254/// for indirect addressing.
1255SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1256 unsigned StackWidth,
1257 SelectionDAG &DAG) const {
1258 unsigned SRLPad;
1259 switch(StackWidth) {
1260 case 1:
1261 SRLPad = 2;
1262 break;
1263 case 2:
1264 SRLPad = 3;
1265 break;
1266 case 4:
1267 SRLPad = 4;
1268 break;
1269 default: llvm_unreachable("Invalid stack width");
1270 }
1271
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001272 SDLoc DL(Ptr);
1273 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1274 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001275}
1276
1277void R600TargetLowering::getStackAddress(unsigned StackWidth,
1278 unsigned ElemIdx,
1279 unsigned &Channel,
1280 unsigned &PtrIncr) const {
1281 switch (StackWidth) {
1282 default:
1283 case 1:
1284 Channel = 0;
1285 if (ElemIdx > 0) {
1286 PtrIncr = 1;
1287 } else {
1288 PtrIncr = 0;
1289 }
1290 break;
1291 case 2:
1292 Channel = ElemIdx % 2;
1293 if (ElemIdx == 2) {
1294 PtrIncr = 1;
1295 } else {
1296 PtrIncr = 0;
1297 }
1298 break;
1299 case 4:
1300 Channel = ElemIdx;
1301 PtrIncr = 0;
1302 break;
1303 }
1304}
1305
Tom Stellard75aadc22012-12-11 21:25:42 +00001306SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001307 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001308 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1309 SDValue Chain = Op.getOperand(0);
1310 SDValue Value = Op.getOperand(1);
1311 SDValue Ptr = Op.getOperand(2);
1312
Tom Stellard2ffc3302013-08-26 15:05:44 +00001313 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001314 if (Result.getNode()) {
1315 return Result;
1316 }
1317
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001318 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1319 if (StoreNode->isTruncatingStore()) {
1320 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001321 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001322 EVT MemVT = StoreNode->getMemoryVT();
1323 SDValue MaskConstant;
1324 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001325 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001326 } else {
1327 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001328 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001329 }
1330 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001331 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001332 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001333 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001334 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1335 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001336 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001337 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1338 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1339 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1340 // vector instead.
1341 SDValue Src[4] = {
1342 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001343 DAG.getConstant(0, DL, MVT::i32),
1344 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001345 Mask
1346 };
Craig Topper48d114b2014-04-26 18:35:24 +00001347 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001348 SDValue Args[3] = { Chain, Input, DWordAddr };
1349 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001350 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001351 StoreNode->getMemOperand());
1352 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1353 Value.getValueType().bitsGE(MVT::i32)) {
1354 // Convert pointer from byte address to dword address.
1355 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1356 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001357 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001358
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001359 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001360 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001361 } else {
1362 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1363 }
1364 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001365 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001366 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001367
1368 EVT ValueVT = Value.getValueType();
1369
1370 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1371 return SDValue();
1372 }
1373
Tom Stellarde9373602014-01-22 19:24:14 +00001374 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1375 if (Ret.getNode()) {
1376 return Ret;
1377 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001378 // Lowering for indirect addressing
1379
1380 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001381 const AMDGPUFrameLowering *TFL =
1382 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001383 unsigned StackWidth = TFL->getStackWidth(MF);
1384
1385 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1386
1387 if (ValueVT.isVector()) {
1388 unsigned NumElemVT = ValueVT.getVectorNumElements();
1389 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001390 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001391
1392 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1393 "vector width in load");
1394
1395 for (unsigned i = 0; i < NumElemVT; ++i) {
1396 unsigned Channel, PtrIncr;
1397 getStackAddress(StackWidth, i, Channel, PtrIncr);
1398 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001399 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001400 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001401 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001402
1403 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1404 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001405 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001406 }
Craig Topper48d114b2014-04-26 18:35:24 +00001407 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001408 } else {
1409 if (ValueVT == MVT::i8) {
1410 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1411 }
1412 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001413 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001414 }
1415
1416 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001417}
1418
Tom Stellard365366f2013-01-23 02:09:06 +00001419// return (512 + (kc_bank << 12)
1420static int
1421ConstantAddressBlock(unsigned AddressSpace) {
1422 switch (AddressSpace) {
1423 case AMDGPUAS::CONSTANT_BUFFER_0:
1424 return 512;
1425 case AMDGPUAS::CONSTANT_BUFFER_1:
1426 return 512 + 4096;
1427 case AMDGPUAS::CONSTANT_BUFFER_2:
1428 return 512 + 4096 * 2;
1429 case AMDGPUAS::CONSTANT_BUFFER_3:
1430 return 512 + 4096 * 3;
1431 case AMDGPUAS::CONSTANT_BUFFER_4:
1432 return 512 + 4096 * 4;
1433 case AMDGPUAS::CONSTANT_BUFFER_5:
1434 return 512 + 4096 * 5;
1435 case AMDGPUAS::CONSTANT_BUFFER_6:
1436 return 512 + 4096 * 6;
1437 case AMDGPUAS::CONSTANT_BUFFER_7:
1438 return 512 + 4096 * 7;
1439 case AMDGPUAS::CONSTANT_BUFFER_8:
1440 return 512 + 4096 * 8;
1441 case AMDGPUAS::CONSTANT_BUFFER_9:
1442 return 512 + 4096 * 9;
1443 case AMDGPUAS::CONSTANT_BUFFER_10:
1444 return 512 + 4096 * 10;
1445 case AMDGPUAS::CONSTANT_BUFFER_11:
1446 return 512 + 4096 * 11;
1447 case AMDGPUAS::CONSTANT_BUFFER_12:
1448 return 512 + 4096 * 12;
1449 case AMDGPUAS::CONSTANT_BUFFER_13:
1450 return 512 + 4096 * 13;
1451 case AMDGPUAS::CONSTANT_BUFFER_14:
1452 return 512 + 4096 * 14;
1453 case AMDGPUAS::CONSTANT_BUFFER_15:
1454 return 512 + 4096 * 15;
1455 default:
1456 return -1;
1457 }
1458}
1459
1460SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1461{
1462 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001463 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001464 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1465 SDValue Chain = Op.getOperand(0);
1466 SDValue Ptr = Op.getOperand(1);
1467 SDValue LoweredLoad;
1468
Matt Arsenault8b03e6c2015-07-09 18:47:03 +00001469 if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1470 return Ret;
Tom Stellarde9373602014-01-22 19:24:14 +00001471
Tom Stellard067c8152014-07-21 14:01:14 +00001472 // Lower loads constant address space global variable loads
1473 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001474 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001475 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001476
Mehdi Amini44ede332015-07-09 02:09:04 +00001477 SDValue Ptr = DAG.getZExtOrTrunc(
1478 LoadNode->getBasePtr(), DL,
1479 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001480 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001481 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001482 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1483 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001484 DAG.getTargetConstant(0, DL, MVT::i32),
1485 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001486 }
Tom Stellarde9373602014-01-22 19:24:14 +00001487
Tom Stellard35bb18c2013-08-26 15:06:04 +00001488 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1489 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001490 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001491 Chain
1492 };
Craig Topper64941d92014-04-27 19:20:57 +00001493 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001494 }
1495
Tom Stellard365366f2013-01-23 02:09:06 +00001496 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001497 if (ConstantBlock > -1 &&
1498 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1499 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001500 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001501 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1502 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001503 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001504 SDValue Slots[4];
1505 for (unsigned i = 0; i < 4; i++) {
1506 // We want Const position encoded with the following formula :
1507 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1508 // const_index is Ptr computed by llvm using an alignment of 16.
1509 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1510 // then div by 4 at the ISel step
1511 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001512 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001513 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1514 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001515 EVT NewVT = MVT::v4i32;
1516 unsigned NumElements = 4;
1517 if (VT.isVector()) {
1518 NewVT = VT;
1519 NumElements = VT.getVectorNumElements();
1520 }
Craig Topper48d114b2014-04-26 18:35:24 +00001521 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001522 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001523 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001524 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001525 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001526 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1527 DAG.getConstant(4, DL, MVT::i32)),
1528 DAG.getConstant(LoadNode->getAddressSpace() -
1529 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001530 );
1531 }
1532
1533 if (!VT.isVector()) {
1534 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001535 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001536 }
1537
1538 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001539 Result,
1540 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001541 };
Craig Topper64941d92014-04-27 19:20:57 +00001542 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001543 }
1544
Matt Arsenault909d0c02013-10-30 23:43:29 +00001545 // For most operations returning SDValue() will result in the node being
1546 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1547 // need to manually expand loads that may be legal in some address spaces and
1548 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1549 // compute shaders, since the data is sign extended when it is uploaded to the
1550 // buffer. However SEXT loads from other address spaces are not supported, so
1551 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001552 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1553 EVT MemVT = LoadNode->getMemoryVT();
1554 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001555 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1556 LoadNode->getPointerInfo(), MemVT,
1557 LoadNode->isVolatile(),
1558 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001559 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001560 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001561 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1562 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001563
Jan Veselyb670d372015-05-26 18:07:22 +00001564 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001565 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001566 }
1567
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001568 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1569 return SDValue();
1570 }
1571
1572 // Lowering for indirect addressing
1573 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001574 const AMDGPUFrameLowering *TFL =
1575 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001576 unsigned StackWidth = TFL->getStackWidth(MF);
1577
1578 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1579
1580 if (VT.isVector()) {
1581 unsigned NumElemVT = VT.getVectorNumElements();
1582 EVT ElemVT = VT.getVectorElementType();
1583 SDValue Loads[4];
1584
1585 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1586 "vector width in load");
1587
1588 for (unsigned i = 0; i < NumElemVT; ++i) {
1589 unsigned Channel, PtrIncr;
1590 getStackAddress(StackWidth, i, Channel, PtrIncr);
1591 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001592 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001593 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1594 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001595 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001596 Op.getOperand(2));
1597 }
1598 for (unsigned i = NumElemVT; i < 4; ++i) {
1599 Loads[i] = DAG.getUNDEF(ElemVT);
1600 }
1601 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001602 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001603 } else {
1604 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1605 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001606 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001607 Op.getOperand(2));
1608 }
1609
Matt Arsenault7939acd2014-04-07 16:44:24 +00001610 SDValue Ops[2] = {
1611 LoweredLoad,
1612 Chain
1613 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001614
Craig Topper64941d92014-04-27 19:20:57 +00001615 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001616}
Tom Stellard75aadc22012-12-11 21:25:42 +00001617
Matt Arsenault1d555c42014-06-23 18:00:55 +00001618SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1619 SDValue Chain = Op.getOperand(0);
1620 SDValue Cond = Op.getOperand(1);
1621 SDValue Jump = Op.getOperand(2);
1622
1623 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1624 Chain, Jump, Cond);
1625}
1626
Tom Stellard75aadc22012-12-11 21:25:42 +00001627/// XXX Only kernel functions are supported, so we can assume for now that
1628/// every function is a kernel function, but in the future we should use
1629/// separate calling conventions for kernel and non-kernel functions.
1630SDValue R600TargetLowering::LowerFormalArguments(
1631 SDValue Chain,
1632 CallingConv::ID CallConv,
1633 bool isVarArg,
1634 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001635 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001636 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001637 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001638 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1639 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001640 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001641 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001642
Tom Stellardaf775432013-10-23 00:44:32 +00001643 SmallVector<ISD::InputArg, 8> LocalIns;
1644
Matt Arsenault209a7b92014-04-18 07:40:20 +00001645 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001646
1647 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001648
Tom Stellard1e803092013-07-23 01:48:18 +00001649 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001650 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001651 const ISD::InputArg &In = Ins[i];
1652 EVT VT = In.VT;
1653 EVT MemVT = VA.getLocVT();
1654 if (!VT.isVector() && MemVT.isVector()) {
1655 // Get load source type if scalarized.
1656 MemVT = MemVT.getVectorElementType();
1657 }
Tom Stellard78e01292013-07-23 01:47:58 +00001658
Jan Veselye5121f32014-10-14 20:05:26 +00001659 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001660 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1661 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1662 InVals.push_back(Register);
1663 continue;
1664 }
1665
Tom Stellard75aadc22012-12-11 21:25:42 +00001666 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001667 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001668
Matt Arsenaultfae02982014-03-17 18:58:11 +00001669 // i64 isn't a legal type, so the register type used ends up as i32, which
1670 // isn't expected here. It attempts to create this sextload, but it ends up
1671 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1672 // for <1 x i64>.
1673
Tom Stellardacfeebf2013-07-23 01:48:05 +00001674 // The first 36 bytes of the input buffer contains information about
1675 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001676 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1677 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1678 // FIXME: This should really check the extload type, but the handling of
1679 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001680
Matt Arsenault74ef2772014-08-13 18:14:11 +00001681 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1682 Ext = ISD::SEXTLOAD;
1683 }
1684
1685 // Compute the offset from the value.
1686 // XXX - I think PartOffset should give you this, but it seems to give the
1687 // size of the register which isn't useful.
1688
Andrew Trick05938a52015-02-16 18:10:47 +00001689 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001690 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001691 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001692
1693 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1694 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001695 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001696 DAG.getUNDEF(MVT::i32),
1697 PtrInfo,
1698 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001699
1700 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001701 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001702 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001703 }
1704 return Chain;
1705}
1706
Mehdi Amini44ede332015-07-09 02:09:04 +00001707EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1708 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001709 if (!VT.isVector())
1710 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001711 return VT.changeVectorElementTypeToInteger();
1712}
1713
Matt Arsenault209a7b92014-04-18 07:40:20 +00001714static SDValue CompactSwizzlableVector(
1715 SelectionDAG &DAG, SDValue VectorEntry,
1716 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001717 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1718 assert(RemapSwizzle.empty());
1719 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001720 VectorEntry.getOperand(0),
1721 VectorEntry.getOperand(1),
1722 VectorEntry.getOperand(2),
1723 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001724 };
1725
1726 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001727 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1728 // We mask write here to teach later passes that the ith element of this
1729 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1730 // break false dependencies and additionnaly make assembly easier to read.
1731 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001732 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1733 if (C->isZero()) {
1734 RemapSwizzle[i] = 4; // SEL_0
1735 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1736 } else if (C->isExactlyValue(1.0)) {
1737 RemapSwizzle[i] = 5; // SEL_1
1738 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1739 }
1740 }
1741
1742 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1743 continue;
1744 for (unsigned j = 0; j < i; j++) {
1745 if (NewBldVec[i] == NewBldVec[j]) {
1746 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1747 RemapSwizzle[i] = j;
1748 break;
1749 }
1750 }
1751 }
1752
1753 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001754 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001755}
1756
Benjamin Kramer193960c2013-06-11 13:32:25 +00001757static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1758 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001759 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1760 assert(RemapSwizzle.empty());
1761 SDValue NewBldVec[4] = {
1762 VectorEntry.getOperand(0),
1763 VectorEntry.getOperand(1),
1764 VectorEntry.getOperand(2),
1765 VectorEntry.getOperand(3)
1766 };
1767 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001768 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001769 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001770 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1771 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1772 ->getZExtValue();
1773 if (i == Idx)
1774 isUnmovable[Idx] = true;
1775 }
1776 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001777
1778 for (unsigned i = 0; i < 4; i++) {
1779 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1780 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1781 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001782 if (isUnmovable[Idx])
1783 continue;
1784 // Swap i and Idx
1785 std::swap(NewBldVec[Idx], NewBldVec[i]);
1786 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1787 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001788 }
1789 }
1790
1791 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001792 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001793}
1794
1795
1796SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001797 SDValue Swz[4], SelectionDAG &DAG,
1798 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001799 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1800 // Old -> New swizzle values
1801 DenseMap<unsigned, unsigned> SwizzleRemap;
1802
1803 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1804 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001805 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001806 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001807 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001808 }
1809
1810 SwizzleRemap.clear();
1811 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1812 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001813 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001814 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001815 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001816 }
1817
1818 return BuildVector;
1819}
1820
1821
Tom Stellard75aadc22012-12-11 21:25:42 +00001822//===----------------------------------------------------------------------===//
1823// Custom DAG Optimizations
1824//===----------------------------------------------------------------------===//
1825
1826SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1827 DAGCombinerInfo &DCI) const {
1828 SelectionDAG &DAG = DCI.DAG;
1829
1830 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001831 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001832 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1833 case ISD::FP_ROUND: {
1834 SDValue Arg = N->getOperand(0);
1835 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001836 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001837 Arg.getOperand(0));
1838 }
1839 break;
1840 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001841
1842 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1843 // (i32 select_cc f32, f32, -1, 0 cc)
1844 //
1845 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1846 // this to one of the SET*_DX10 instructions.
1847 case ISD::FP_TO_SINT: {
1848 SDValue FNeg = N->getOperand(0);
1849 if (FNeg.getOpcode() != ISD::FNEG) {
1850 return SDValue();
1851 }
1852 SDValue SelectCC = FNeg.getOperand(0);
1853 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1854 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1855 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1856 !isHWTrueValue(SelectCC.getOperand(2)) ||
1857 !isHWFalseValue(SelectCC.getOperand(3))) {
1858 return SDValue();
1859 }
1860
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001861 SDLoc dl(N);
1862 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001863 SelectCC.getOperand(0), // LHS
1864 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001865 DAG.getConstant(-1, dl, MVT::i32), // True
1866 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001867 SelectCC.getOperand(4)); // CC
1868
1869 break;
1870 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001871
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001872 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1873 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001874 case ISD::INSERT_VECTOR_ELT: {
1875 SDValue InVec = N->getOperand(0);
1876 SDValue InVal = N->getOperand(1);
1877 SDValue EltNo = N->getOperand(2);
1878 SDLoc dl(N);
1879
1880 // If the inserted element is an UNDEF, just use the input vector.
1881 if (InVal.getOpcode() == ISD::UNDEF)
1882 return InVec;
1883
1884 EVT VT = InVec.getValueType();
1885
1886 // If we can't generate a legal BUILD_VECTOR, exit
1887 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1888 return SDValue();
1889
1890 // Check that we know which element is being inserted
1891 if (!isa<ConstantSDNode>(EltNo))
1892 return SDValue();
1893 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1894
1895 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1896 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1897 // vector elements.
1898 SmallVector<SDValue, 8> Ops;
1899 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1900 Ops.append(InVec.getNode()->op_begin(),
1901 InVec.getNode()->op_end());
1902 } else if (InVec.getOpcode() == ISD::UNDEF) {
1903 unsigned NElts = VT.getVectorNumElements();
1904 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1905 } else {
1906 return SDValue();
1907 }
1908
1909 // Insert the element
1910 if (Elt < Ops.size()) {
1911 // All the operands of BUILD_VECTOR must have the same type;
1912 // we enforce that here.
1913 EVT OpVT = Ops[0].getValueType();
1914 if (InVal.getValueType() != OpVT)
1915 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1916 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1917 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1918 Ops[Elt] = InVal;
1919 }
1920
1921 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001922 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001923 }
1924
Tom Stellard365366f2013-01-23 02:09:06 +00001925 // Extract_vec (Build_vector) generated by custom lowering
1926 // also needs to be customly combined
1927 case ISD::EXTRACT_VECTOR_ELT: {
1928 SDValue Arg = N->getOperand(0);
1929 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1930 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1931 unsigned Element = Const->getZExtValue();
1932 return Arg->getOperand(Element);
1933 }
1934 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001935 if (Arg.getOpcode() == ISD::BITCAST &&
1936 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1937 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1938 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001939 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001940 Arg->getOperand(0).getOperand(Element));
1941 }
1942 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00001943 break;
Tom Stellard365366f2013-01-23 02:09:06 +00001944 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001945
1946 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001947 // Try common optimizations
1948 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1949 if (Ret.getNode())
1950 return Ret;
1951
Tom Stellarde06163a2013-02-07 14:02:35 +00001952 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1953 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001954 //
1955 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1956 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001957 SDValue LHS = N->getOperand(0);
1958 if (LHS.getOpcode() != ISD::SELECT_CC) {
1959 return SDValue();
1960 }
1961
1962 SDValue RHS = N->getOperand(1);
1963 SDValue True = N->getOperand(2);
1964 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001965 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001966
1967 if (LHS.getOperand(2).getNode() != True.getNode() ||
1968 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001969 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001970 return SDValue();
1971 }
1972
Tom Stellard5e524892013-03-08 15:37:11 +00001973 switch (NCC) {
1974 default: return SDValue();
1975 case ISD::SETNE: return LHS;
1976 case ISD::SETEQ: {
1977 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1978 LHSCC = ISD::getSetCCInverse(LHSCC,
1979 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001980 if (DCI.isBeforeLegalizeOps() ||
1981 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1982 return DAG.getSelectCC(SDLoc(N),
1983 LHS.getOperand(0),
1984 LHS.getOperand(1),
1985 LHS.getOperand(2),
1986 LHS.getOperand(3),
1987 LHSCC);
1988 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001989 }
Tom Stellard5e524892013-03-08 15:37:11 +00001990 }
Tom Stellardcd428182013-09-28 02:50:38 +00001991 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001992 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001993
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001994 case AMDGPUISD::EXPORT: {
1995 SDValue Arg = N->getOperand(1);
1996 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1997 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001998
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001999 SDValue NewArgs[8] = {
2000 N->getOperand(0), // Chain
2001 SDValue(),
2002 N->getOperand(2), // ArrayBase
2003 N->getOperand(3), // Type
2004 N->getOperand(4), // SWZ_X
2005 N->getOperand(5), // SWZ_Y
2006 N->getOperand(6), // SWZ_Z
2007 N->getOperand(7) // SWZ_W
2008 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002009 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002010 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002011 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002012 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002013 case AMDGPUISD::TEXTURE_FETCH: {
2014 SDValue Arg = N->getOperand(1);
2015 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2016 break;
2017
2018 SDValue NewArgs[19] = {
2019 N->getOperand(0),
2020 N->getOperand(1),
2021 N->getOperand(2),
2022 N->getOperand(3),
2023 N->getOperand(4),
2024 N->getOperand(5),
2025 N->getOperand(6),
2026 N->getOperand(7),
2027 N->getOperand(8),
2028 N->getOperand(9),
2029 N->getOperand(10),
2030 N->getOperand(11),
2031 N->getOperand(12),
2032 N->getOperand(13),
2033 N->getOperand(14),
2034 N->getOperand(15),
2035 N->getOperand(16),
2036 N->getOperand(17),
2037 N->getOperand(18),
2038 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002039 SDLoc DL(N);
2040 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2041 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002042 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002043 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002044
2045 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002046}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002047
2048static bool
2049FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002050 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002051 const R600InstrInfo *TII =
2052 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002053 if (!Src.isMachineOpcode())
2054 return false;
2055 switch (Src.getMachineOpcode()) {
2056 case AMDGPU::FNEG_R600:
2057 if (!Neg.getNode())
2058 return false;
2059 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002060 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002061 return true;
2062 case AMDGPU::FABS_R600:
2063 if (!Abs.getNode())
2064 return false;
2065 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002066 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002067 return true;
2068 case AMDGPU::CONST_COPY: {
2069 unsigned Opcode = ParentNode->getMachineOpcode();
2070 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2071
2072 if (!Sel.getNode())
2073 return false;
2074
2075 SDValue CstOffset = Src.getOperand(0);
2076 if (ParentNode->getValueType(0).isVector())
2077 return false;
2078
2079 // Gather constants values
2080 int SrcIndices[] = {
2081 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2082 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2085 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2086 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2087 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2088 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2089 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2090 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2091 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2092 };
2093 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002094 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002095 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2096 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2097 continue;
2098 if (HasDst) {
2099 OtherSrcIdx--;
2100 OtherSelIdx--;
2101 }
2102 if (RegisterSDNode *Reg =
2103 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2104 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002105 ConstantSDNode *Cst
2106 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002107 Consts.push_back(Cst->getZExtValue());
2108 }
2109 }
2110 }
2111
Matt Arsenault37c12d72014-05-12 20:42:57 +00002112 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002113 Consts.push_back(Cst->getZExtValue());
2114 if (!TII->fitsConstReadLimitations(Consts)) {
2115 return false;
2116 }
2117
2118 Sel = CstOffset;
2119 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2120 return true;
2121 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002122 case AMDGPU::MOV_IMM_I32:
2123 case AMDGPU::MOV_IMM_F32: {
2124 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2125 uint64_t ImmValue = 0;
2126
2127
2128 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2129 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2130 float FloatValue = FPC->getValueAPF().convertToFloat();
2131 if (FloatValue == 0.0) {
2132 ImmReg = AMDGPU::ZERO;
2133 } else if (FloatValue == 0.5) {
2134 ImmReg = AMDGPU::HALF;
2135 } else if (FloatValue == 1.0) {
2136 ImmReg = AMDGPU::ONE;
2137 } else {
2138 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2139 }
2140 } else {
2141 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2142 uint64_t Value = C->getZExtValue();
2143 if (Value == 0) {
2144 ImmReg = AMDGPU::ZERO;
2145 } else if (Value == 1) {
2146 ImmReg = AMDGPU::ONE_INT;
2147 } else {
2148 ImmValue = Value;
2149 }
2150 }
2151
2152 // Check that we aren't already using an immediate.
2153 // XXX: It's possible for an instruction to have more than one
2154 // immediate operand, but this is not supported yet.
2155 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2156 if (!Imm.getNode())
2157 return false;
2158 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2159 assert(C);
2160 if (C->getZExtValue())
2161 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002162 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002163 }
2164 Src = DAG.getRegister(ImmReg, MVT::i32);
2165 return true;
2166 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002167 default:
2168 return false;
2169 }
2170}
2171
2172
2173/// \brief Fold the instructions after selecting them
2174SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2175 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002176 const R600InstrInfo *TII =
2177 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002178 if (!Node->isMachineOpcode())
2179 return Node;
2180 unsigned Opcode = Node->getMachineOpcode();
2181 SDValue FakeOp;
2182
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002183 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002184
2185 if (Opcode == AMDGPU::DOT_4) {
2186 int OperandIdx[] = {
2187 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2188 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2189 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002195 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002196 int NegIdx[] = {
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2205 };
2206 int AbsIdx[] = {
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2215 };
2216 for (unsigned i = 0; i < 8; i++) {
2217 if (OperandIdx[i] < 0)
2218 return Node;
2219 SDValue &Src = Ops[OperandIdx[i] - 1];
2220 SDValue &Neg = Ops[NegIdx[i] - 1];
2221 SDValue &Abs = Ops[AbsIdx[i] - 1];
2222 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2223 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2224 if (HasDst)
2225 SelIdx--;
2226 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002227 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2228 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2229 }
2230 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2231 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2232 SDValue &Src = Ops[i];
2233 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002234 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2235 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002236 } else if (Opcode == AMDGPU::CLAMP_R600) {
2237 SDValue Src = Node->getOperand(0);
2238 if (!Src.isMachineOpcode() ||
2239 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2240 return Node;
2241 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2242 AMDGPU::OpName::clamp);
2243 if (ClampIdx < 0)
2244 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002245 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002246 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002247 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2248 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2249 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002250 } else {
2251 if (!TII->hasInstrModifiers(Opcode))
2252 return Node;
2253 int OperandIdx[] = {
2254 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2255 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2256 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2257 };
2258 int NegIdx[] = {
2259 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2260 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2261 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2262 };
2263 int AbsIdx[] = {
2264 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2265 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2266 -1
2267 };
2268 for (unsigned i = 0; i < 3; i++) {
2269 if (OperandIdx[i] < 0)
2270 return Node;
2271 SDValue &Src = Ops[OperandIdx[i] - 1];
2272 SDValue &Neg = Ops[NegIdx[i] - 1];
2273 SDValue FakeAbs;
2274 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2275 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2276 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002277 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2278 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002279 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002280 ImmIdx--;
2281 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002282 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002283 SDValue &Imm = Ops[ImmIdx];
2284 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002285 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2286 }
2287 }
2288
2289 return Node;
2290}