blob: e2604b2ed22e7a0a614560b766a24cbea8c765c8 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Matt Arsenault4e466652014-04-16 01:41:30 +000094 // Expand sign extension of vectors
95 if (!Subtarget->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
97
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
105
106 if (!Subtarget->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
114
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
116
117
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000120 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000121 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000125 for (MVT VT : MVT::integer_valuetypes()) {
126 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
127 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
128 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000129
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000130 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
131 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
132 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000133
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000134 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
137 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000138
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000139 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000140 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000141 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000142 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000143 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000145
Tom Stellard365366f2013-01-23 02:09:06 +0000146 setOperationAction(ISD::LOAD, MVT::i32, Custom);
147 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000148 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
149
Tom Stellard880a80a2014-06-17 16:53:14 +0000150 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
151 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
152 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
153 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
154
155 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
156 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
157 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
158 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
159
Tom Stellard75aadc22012-12-11 21:25:42 +0000160 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000161 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000162 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000163 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000164 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000165
Jan Veselyffcd9682015-04-13 17:47:15 +0000166 setOperationAction(ISD::SUB, MVT::i64, Expand);
167
Tom Stellard5f337882014-04-29 23:12:43 +0000168 // These should be replaced by UDVIREM, but it does not happen automatically
169 // during Type Legalization
170 setOperationAction(ISD::UDIV, MVT::i64, Custom);
171 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000172 setOperationAction(ISD::SDIV, MVT::i64, Custom);
173 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000174
Jan Vesely25f36272014-06-18 12:27:13 +0000175 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
176 // to be Legal/Custom in order to avoid library calls.
177 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000178 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000179 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000180
Michel Danzer49812b52013-07-10 16:37:07 +0000181 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
182
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000183 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
184 for (MVT VT : ScalarIntVTs) {
185 setOperationAction(ISD::ADDC, VT, Expand);
186 setOperationAction(ISD::SUBC, VT, Expand);
187 setOperationAction(ISD::ADDE, VT, Expand);
188 setOperationAction(ISD::SUBE, VT, Expand);
189 }
190
Tom Stellardfc455472013-08-12 22:33:21 +0000191 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000192}
193
194MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
195 MachineInstr * MI, MachineBasicBlock * BB) const {
196 MachineFunction * MF = BB->getParent();
197 MachineRegisterInfo &MRI = MF->getRegInfo();
198 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000199 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000200 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000201
202 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000203 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000204 // Replace LDS_*_RET instruction that don't have any uses with the
205 // equivalent LDS_*_NORET instruction.
206 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000207 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
208 assert(DstIdx != -1);
209 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000210 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
211 // LDS_1A2D support and remove this special case.
212 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
213 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000214 return BB;
215
216 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
217 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000218 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
219 NewMI.addOperand(MI->getOperand(i));
220 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 } else {
222 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
223 }
224 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225 case AMDGPU::CLAMP_R600: {
226 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
227 AMDGPU::MOV,
228 MI->getOperand(0).getReg(),
229 MI->getOperand(1).getReg());
230 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
231 break;
232 }
233
234 case AMDGPU::FABS_R600: {
235 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
236 AMDGPU::MOV,
237 MI->getOperand(0).getReg(),
238 MI->getOperand(1).getReg());
239 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
240 break;
241 }
242
243 case AMDGPU::FNEG_R600: {
244 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
245 AMDGPU::MOV,
246 MI->getOperand(0).getReg(),
247 MI->getOperand(1).getReg());
248 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
249 break;
250 }
251
Tom Stellard75aadc22012-12-11 21:25:42 +0000252 case AMDGPU::MASK_WRITE: {
253 unsigned maskedRegister = MI->getOperand(0).getReg();
254 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
255 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
256 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
257 break;
258 }
259
260 case AMDGPU::MOV_IMM_F32:
261 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
262 MI->getOperand(1).getFPImm()->getValueAPF()
263 .bitcastToAPInt().getZExtValue());
264 break;
265 case AMDGPU::MOV_IMM_I32:
266 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
267 MI->getOperand(1).getImm());
268 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000269 case AMDGPU::CONST_COPY: {
270 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
271 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000272 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000273 MI->getOperand(1).getImm());
274 break;
275 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000276
277 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000278 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000279 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000280 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000281
282 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
283 .addOperand(MI->getOperand(0))
284 .addOperand(MI->getOperand(1))
285 .addImm(EOP); // Set End of program bit
286 break;
287 }
288
Tom Stellard75aadc22012-12-11 21:25:42 +0000289 case AMDGPU::TXD: {
290 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
291 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000292 MachineOperand &RID = MI->getOperand(4);
293 MachineOperand &SID = MI->getOperand(5);
294 unsigned TextureId = MI->getOperand(6).getImm();
295 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
296 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 switch (TextureId) {
299 case 5: // Rect
300 CTX = CTY = 0;
301 break;
302 case 6: // Shadow1D
303 SrcW = SrcZ;
304 break;
305 case 7: // Shadow2D
306 SrcW = SrcZ;
307 break;
308 case 8: // ShadowRect
309 CTX = CTY = 0;
310 SrcW = SrcZ;
311 break;
312 case 9: // 1DArray
313 SrcZ = SrcY;
314 CTZ = 0;
315 break;
316 case 10: // 2DArray
317 CTZ = 0;
318 break;
319 case 11: // Shadow1DArray
320 SrcZ = SrcY;
321 CTZ = 0;
322 break;
323 case 12: // Shadow2DArray
324 CTZ = 0;
325 break;
326 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000327 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
328 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000329 .addImm(SrcX)
330 .addImm(SrcY)
331 .addImm(SrcZ)
332 .addImm(SrcW)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(0)
337 .addImm(1)
338 .addImm(2)
339 .addImm(3)
340 .addOperand(RID)
341 .addOperand(SID)
342 .addImm(CTX)
343 .addImm(CTY)
344 .addImm(CTZ)
345 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000346 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
347 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000348 .addImm(SrcX)
349 .addImm(SrcY)
350 .addImm(SrcZ)
351 .addImm(SrcW)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(1)
357 .addImm(2)
358 .addImm(3)
359 .addOperand(RID)
360 .addOperand(SID)
361 .addImm(CTX)
362 .addImm(CTY)
363 .addImm(CTZ)
364 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000365 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
366 .addOperand(MI->getOperand(0))
367 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000368 .addImm(SrcX)
369 .addImm(SrcY)
370 .addImm(SrcZ)
371 .addImm(SrcW)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(0)
376 .addImm(1)
377 .addImm(2)
378 .addImm(3)
379 .addOperand(RID)
380 .addOperand(SID)
381 .addImm(CTX)
382 .addImm(CTY)
383 .addImm(CTZ)
384 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000385 .addReg(T0, RegState::Implicit)
386 .addReg(T1, RegState::Implicit);
387 break;
388 }
389
390 case AMDGPU::TXD_SHADOW: {
391 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
392 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000393 MachineOperand &RID = MI->getOperand(4);
394 MachineOperand &SID = MI->getOperand(5);
395 unsigned TextureId = MI->getOperand(6).getImm();
396 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
397 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
398
399 switch (TextureId) {
400 case 5: // Rect
401 CTX = CTY = 0;
402 break;
403 case 6: // Shadow1D
404 SrcW = SrcZ;
405 break;
406 case 7: // Shadow2D
407 SrcW = SrcZ;
408 break;
409 case 8: // ShadowRect
410 CTX = CTY = 0;
411 SrcW = SrcZ;
412 break;
413 case 9: // 1DArray
414 SrcZ = SrcY;
415 CTZ = 0;
416 break;
417 case 10: // 2DArray
418 CTZ = 0;
419 break;
420 case 11: // Shadow1DArray
421 SrcZ = SrcY;
422 CTZ = 0;
423 break;
424 case 12: // Shadow2DArray
425 CTZ = 0;
426 break;
427 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000428
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
430 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000431 .addImm(SrcX)
432 .addImm(SrcY)
433 .addImm(SrcZ)
434 .addImm(SrcW)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(0)
439 .addImm(1)
440 .addImm(2)
441 .addImm(3)
442 .addOperand(RID)
443 .addOperand(SID)
444 .addImm(CTX)
445 .addImm(CTY)
446 .addImm(CTZ)
447 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
449 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000450 .addImm(SrcX)
451 .addImm(SrcY)
452 .addImm(SrcZ)
453 .addImm(SrcW)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(0)
458 .addImm(1)
459 .addImm(2)
460 .addImm(3)
461 .addOperand(RID)
462 .addOperand(SID)
463 .addImm(CTX)
464 .addImm(CTY)
465 .addImm(CTZ)
466 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000467 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
468 .addOperand(MI->getOperand(0))
469 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000470 .addImm(SrcX)
471 .addImm(SrcY)
472 .addImm(SrcZ)
473 .addImm(SrcW)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(0)
478 .addImm(1)
479 .addImm(2)
480 .addImm(3)
481 .addOperand(RID)
482 .addOperand(SID)
483 .addImm(CTX)
484 .addImm(CTY)
485 .addImm(CTZ)
486 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 .addReg(T0, RegState::Implicit)
488 .addReg(T1, RegState::Implicit);
489 break;
490 }
491
492 case AMDGPU::BRANCH:
493 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000494 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 break;
496
497 case AMDGPU::BRANCH_COND_f32: {
498 MachineInstr *NewMI =
499 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
500 AMDGPU::PREDICATE_BIT)
501 .addOperand(MI->getOperand(1))
502 .addImm(OPCODE_IS_NOT_ZERO)
503 .addImm(0); // Flags
504 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000505 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 .addOperand(MI->getOperand(0))
507 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
508 break;
509 }
510
511 case AMDGPU::BRANCH_COND_i32: {
512 MachineInstr *NewMI =
513 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
514 AMDGPU::PREDICATE_BIT)
515 .addOperand(MI->getOperand(1))
516 .addImm(OPCODE_IS_NOT_ZERO_INT)
517 .addImm(0); // Flags
518 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000519 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 .addOperand(MI->getOperand(0))
521 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
522 break;
523 }
524
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 case AMDGPU::EG_ExportSwz:
526 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000527 // Instruction is left unmodified if its not the last one of its type
528 bool isLastInstructionOfItsType = true;
529 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000530 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000531 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000532 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000533 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
534 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
535 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
536 .getImm();
537 if (CurrentInstExportType == InstExportType) {
538 isLastInstructionOfItsType = false;
539 break;
540 }
541 }
542 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000543 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000544 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 return BB;
546 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
547 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
548 .addOperand(MI->getOperand(0))
549 .addOperand(MI->getOperand(1))
550 .addOperand(MI->getOperand(2))
551 .addOperand(MI->getOperand(3))
552 .addOperand(MI->getOperand(4))
553 .addOperand(MI->getOperand(5))
554 .addOperand(MI->getOperand(6))
555 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000556 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 break;
558 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000559 case AMDGPU::RETURN: {
560 // RETURN instructions must have the live-out registers as implicit uses,
561 // otherwise they appear dead.
562 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
563 MachineInstrBuilder MIB(*MF, MI);
564 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
565 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
566 return BB;
567 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 }
569
570 MI->eraseFromParent();
571 return BB;
572}
573
574//===----------------------------------------------------------------------===//
575// Custom DAG Lowering Operations
576//===----------------------------------------------------------------------===//
577
Tom Stellard75aadc22012-12-11 21:25:42 +0000578SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000579 MachineFunction &MF = DAG.getMachineFunction();
580 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000581 switch (Op.getOpcode()) {
582 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000583 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
584 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000585 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000586 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000587 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000588 case ISD::FCOS:
589 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000592 case ISD::LOAD: {
593 SDValue Result = LowerLOAD(Op, DAG);
594 assert((!Result.getNode() ||
595 Result.getNode()->getNumValues() == 2) &&
596 "Load should return a value and a chain");
597 return Result;
598 }
599
Matt Arsenault1d555c42014-06-23 18:00:55 +0000600 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000601 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::INTRINSIC_VOID: {
603 SDValue Chain = Op.getOperand(0);
604 unsigned IntrinsicID =
605 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
606 switch (IntrinsicID) {
607 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000608 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
609 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000610 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000611 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000612 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000613 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000614 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000615 const SDValue Args[8] = {
616 Chain,
617 Op.getOperand(2), // Export Value
618 Op.getOperand(3), // ArrayBase
619 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000620 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
621 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
622 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
623 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000624 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000625 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000626 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000627
Tom Stellard75aadc22012-12-11 21:25:42 +0000628 // default for switch(IntrinsicID)
629 default: break;
630 }
631 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
632 break;
633 }
634 case ISD::INTRINSIC_WO_CHAIN: {
635 unsigned IntrinsicID =
636 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
637 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000638 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000639 switch(IntrinsicID) {
640 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000641 case AMDGPUIntrinsic::R600_load_input: {
642 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
643 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
644 MachineFunction &MF = DAG.getMachineFunction();
645 MachineRegisterInfo &MRI = MF.getRegInfo();
646 MRI.addLiveIn(Reg);
647 return DAG.getCopyFromReg(DAG.getEntryNode(),
648 SDLoc(DAG.getEntryNode()), Reg, VT);
649 }
650
651 case AMDGPUIntrinsic::R600_interp_input: {
652 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
653 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
654 MachineSDNode *interp;
655 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000656 const R600InstrInfo *TII =
657 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000658 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000659 MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000660 return DAG.getTargetExtractSubreg(
661 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
662 DL, MVT::f32, SDValue(interp, 0));
663 }
664 MachineFunction &MF = DAG.getMachineFunction();
665 MachineRegisterInfo &MRI = MF.getRegInfo();
666 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
667 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
668 MRI.addLiveIn(RegisterI);
669 MRI.addLiveIn(RegisterJ);
670 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
671 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
672 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
673 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
674
675 if (slot % 4 < 2)
676 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000677 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000678 RegisterJNode, RegisterINode);
679 else
680 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000681 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000682 RegisterJNode, RegisterINode);
683 return SDValue(interp, slot % 2);
684 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000685 case AMDGPUIntrinsic::R600_interp_xy:
686 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000688 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000689 SDValue RegisterINode = Op.getOperand(2);
690 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000691
Vincent Lejeunef143af32013-11-11 22:10:24 +0000692 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000693 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000694 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000695 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000696 else
697 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000698 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000699 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000700 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
701 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000702 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000703 case AMDGPUIntrinsic::R600_tex:
704 case AMDGPUIntrinsic::R600_texc:
705 case AMDGPUIntrinsic::R600_txl:
706 case AMDGPUIntrinsic::R600_txlc:
707 case AMDGPUIntrinsic::R600_txb:
708 case AMDGPUIntrinsic::R600_txbc:
709 case AMDGPUIntrinsic::R600_txf:
710 case AMDGPUIntrinsic::R600_txq:
711 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000712 case AMDGPUIntrinsic::R600_ddy:
713 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000714 unsigned TextureOp;
715 switch (IntrinsicID) {
716 case AMDGPUIntrinsic::R600_tex:
717 TextureOp = 0;
718 break;
719 case AMDGPUIntrinsic::R600_texc:
720 TextureOp = 1;
721 break;
722 case AMDGPUIntrinsic::R600_txl:
723 TextureOp = 2;
724 break;
725 case AMDGPUIntrinsic::R600_txlc:
726 TextureOp = 3;
727 break;
728 case AMDGPUIntrinsic::R600_txb:
729 TextureOp = 4;
730 break;
731 case AMDGPUIntrinsic::R600_txbc:
732 TextureOp = 5;
733 break;
734 case AMDGPUIntrinsic::R600_txf:
735 TextureOp = 6;
736 break;
737 case AMDGPUIntrinsic::R600_txq:
738 TextureOp = 7;
739 break;
740 case AMDGPUIntrinsic::R600_ddx:
741 TextureOp = 8;
742 break;
743 case AMDGPUIntrinsic::R600_ddy:
744 TextureOp = 9;
745 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000746 case AMDGPUIntrinsic::R600_ldptr:
747 TextureOp = 10;
748 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000749 default:
750 llvm_unreachable("Unknow Texture Operation");
751 }
752
753 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000755 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000756 DAG.getConstant(0, DL, MVT::i32),
757 DAG.getConstant(1, DL, MVT::i32),
758 DAG.getConstant(2, DL, MVT::i32),
759 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000760 Op.getOperand(2),
761 Op.getOperand(3),
762 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000763 DAG.getConstant(0, DL, MVT::i32),
764 DAG.getConstant(1, DL, MVT::i32),
765 DAG.getConstant(2, DL, MVT::i32),
766 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000767 Op.getOperand(5),
768 Op.getOperand(6),
769 Op.getOperand(7),
770 Op.getOperand(8),
771 Op.getOperand(9),
772 Op.getOperand(10)
773 };
Craig Topper48d114b2014-04-26 18:35:24 +0000774 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000775 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000776 case AMDGPUIntrinsic::AMDGPU_dp4: {
777 SDValue Args[8] = {
778 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000779 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000780 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000781 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000782 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000783 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000784 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000785 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000786 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000787 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000788 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000789 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000790 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000791 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000792 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000793 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000794 };
Craig Topper48d114b2014-04-26 18:35:24 +0000795 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000796 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000797
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000800 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000806 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000807 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000808 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000810 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000812 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000813 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000814 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000815 return LowerImplicitParameter(DAG, VT, DL, 8);
816
Jan Veselye5121f32014-10-14 20:05:26 +0000817 case Intrinsic::AMDGPU_read_workdim:
818 return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
819
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000820 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000821 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
822 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000823 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000824 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
825 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000826 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
828 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000829 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000830 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
831 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000832 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000833 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
834 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000835 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000836 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
837 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000838 case Intrinsic::AMDGPU_rsq:
839 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
840 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Marek Olsak43650e42015-03-24 13:40:08 +0000841
842 case AMDGPUIntrinsic::AMDGPU_fract:
843 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
844 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000845 }
846 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
847 break;
848 }
849 } // end switch(Op.getOpcode())
850 return SDValue();
851}
852
853void R600TargetLowering::ReplaceNodeResults(SDNode *N,
854 SmallVectorImpl<SDValue> &Results,
855 SelectionDAG &DAG) const {
856 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000857 default:
858 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
859 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000860 case ISD::FP_TO_UINT:
861 if (N->getValueType(0) == MVT::i1) {
862 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
863 return;
864 }
865 // Fall-through. Since we don't care about out of bounds values
866 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
867 // considers some extra cases which are not necessary here.
868 case ISD::FP_TO_SINT: {
869 SDValue Result;
870 if (expandFP_TO_SINT(N, Result, DAG))
871 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000872 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000873 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000874 case ISD::UDIV: {
875 SDValue Op = SDValue(N, 0);
876 SDLoc DL(Op);
877 EVT VT = Op.getValueType();
878 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
879 N->getOperand(0), N->getOperand(1));
880 Results.push_back(UDIVREM);
881 break;
882 }
883 case ISD::UREM: {
884 SDValue Op = SDValue(N, 0);
885 SDLoc DL(Op);
886 EVT VT = Op.getValueType();
887 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
888 N->getOperand(0), N->getOperand(1));
889 Results.push_back(UDIVREM.getValue(1));
890 break;
891 }
892 case ISD::SDIV: {
893 SDValue Op = SDValue(N, 0);
894 SDLoc DL(Op);
895 EVT VT = Op.getValueType();
896 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
897 N->getOperand(0), N->getOperand(1));
898 Results.push_back(SDIVREM);
899 break;
900 }
901 case ISD::SREM: {
902 SDValue Op = SDValue(N, 0);
903 SDLoc DL(Op);
904 EVT VT = Op.getValueType();
905 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
906 N->getOperand(0), N->getOperand(1));
907 Results.push_back(SDIVREM.getValue(1));
908 break;
909 }
910 case ISD::SDIVREM: {
911 SDValue Op = SDValue(N, 1);
912 SDValue RES = LowerSDIVREM(Op, DAG);
913 Results.push_back(RES);
914 Results.push_back(RES.getValue(1));
915 break;
916 }
917 case ISD::UDIVREM: {
918 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000919 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000920 break;
921 }
922 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000923}
924
Tom Stellard880a80a2014-06-17 16:53:14 +0000925SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
926 SDValue Vector) const {
927
928 SDLoc DL(Vector);
929 EVT VecVT = Vector.getValueType();
930 EVT EltVT = VecVT.getVectorElementType();
931 SmallVector<SDValue, 8> Args;
932
933 for (unsigned i = 0, e = VecVT.getVectorNumElements();
934 i != e; ++i) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000935 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
936 DAG.getConstant(i, DL, getVectorIdxTy())));
Tom Stellard880a80a2014-06-17 16:53:14 +0000937 }
938
939 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
940}
941
942SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
943 SelectionDAG &DAG) const {
944
945 SDLoc DL(Op);
946 SDValue Vector = Op.getOperand(0);
947 SDValue Index = Op.getOperand(1);
948
949 if (isa<ConstantSDNode>(Index) ||
950 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
951 return Op;
952
953 Vector = vectorToVerticalVector(DAG, Vector);
954 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
955 Vector, Index);
956}
957
958SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
959 SelectionDAG &DAG) const {
960 SDLoc DL(Op);
961 SDValue Vector = Op.getOperand(0);
962 SDValue Value = Op.getOperand(1);
963 SDValue Index = Op.getOperand(2);
964
965 if (isa<ConstantSDNode>(Index) ||
966 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
967 return Op;
968
969 Vector = vectorToVerticalVector(DAG, Vector);
970 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
971 Vector, Value, Index);
972 return vectorToVerticalVector(DAG, Insert);
973}
974
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000975SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
976 // On hw >= R700, COS/SIN input must be between -1. and 1.
977 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
978 EVT VT = Op.getValueType();
979 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000980 SDLoc DL(Op);
981 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
982 DAG.getNode(ISD::FADD, DL, VT,
983 DAG.getNode(ISD::FMUL, DL, VT, Arg,
984 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
985 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000986 unsigned TrigNode;
987 switch (Op.getOpcode()) {
988 case ISD::FCOS:
989 TrigNode = AMDGPUISD::COS_HW;
990 break;
991 case ISD::FSIN:
992 TrigNode = AMDGPUISD::SIN_HW;
993 break;
994 default:
995 llvm_unreachable("Wrong trig opcode");
996 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000997 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
998 DAG.getNode(ISD::FADD, DL, VT, FractPart,
999 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001000 if (Gen >= AMDGPUSubtarget::R700)
1001 return TrigVal;
1002 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001003 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
1004 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001005}
1006
Jan Vesely25f36272014-06-18 12:27:13 +00001007SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1008 SDLoc DL(Op);
1009 EVT VT = Op.getValueType();
1010
1011 SDValue Lo = Op.getOperand(0);
1012 SDValue Hi = Op.getOperand(1);
1013 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001014 SDValue Zero = DAG.getConstant(0, DL, VT);
1015 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +00001016
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001017 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1018 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +00001019 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1020 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1021
1022 // The dance around Width1 is necessary for 0 special case.
1023 // Without it the CompShift might be 32, producing incorrect results in
1024 // Overflow. So we do the shift in two steps, the alternative is to
1025 // add a conditional to filter the special case.
1026
1027 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1028 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1029
1030 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1031 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1032 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1033
1034 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1035 SDValue LoBig = Zero;
1036
1037 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1038 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1039
1040 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1041}
1042
Jan Vesely900ff2e2014-06-18 12:27:15 +00001043SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1044 SDLoc DL(Op);
1045 EVT VT = Op.getValueType();
1046
1047 SDValue Lo = Op.getOperand(0);
1048 SDValue Hi = Op.getOperand(1);
1049 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001050 SDValue Zero = DAG.getConstant(0, DL, VT);
1051 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001052
Jan Veselyecf51332014-06-18 12:27:17 +00001053 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1054
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001055 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1056 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001057 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1058 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1059
1060 // The dance around Width1 is necessary for 0 special case.
1061 // Without it the CompShift might be 32, producing incorrect results in
1062 // Overflow. So we do the shift in two steps, the alternative is to
1063 // add a conditional to filter the special case.
1064
1065 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1066 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1067
Jan Veselyecf51332014-06-18 12:27:17 +00001068 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001069 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1070 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1071
Jan Veselyecf51332014-06-18 12:27:17 +00001072 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1073 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001074
1075 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1076 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1077
1078 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1079}
1080
Tom Stellard75aadc22012-12-11 21:25:42 +00001081SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001082 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001083 return DAG.getNode(
1084 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001085 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001086 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001087 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001088 DAG.getCondCode(ISD::SETNE)
1089 );
1090}
1091
Tom Stellard75aadc22012-12-11 21:25:42 +00001092SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001093 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001094 unsigned DwordOffset) const {
1095 unsigned ByteOffset = DwordOffset * 4;
1096 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001097 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001098
1099 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1100 assert(isInt<16>(ByteOffset));
1101
1102 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001103 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001104 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1105 false, false, false, 0);
1106}
1107
Tom Stellard75aadc22012-12-11 21:25:42 +00001108bool R600TargetLowering::isZero(SDValue Op) const {
1109 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1110 return Cst->isNullValue();
1111 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1112 return CstFP->isZero();
1113 } else {
1114 return false;
1115 }
1116}
1117
1118SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001119 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001120 EVT VT = Op.getValueType();
1121
1122 SDValue LHS = Op.getOperand(0);
1123 SDValue RHS = Op.getOperand(1);
1124 SDValue True = Op.getOperand(2);
1125 SDValue False = Op.getOperand(3);
1126 SDValue CC = Op.getOperand(4);
1127 SDValue Temp;
1128
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001129 if (VT == MVT::f32) {
1130 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1131 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1132 if (MinMax)
1133 return MinMax;
1134 }
1135
Tom Stellard75aadc22012-12-11 21:25:42 +00001136 // LHS and RHS are guaranteed to be the same value type
1137 EVT CompareVT = LHS.getValueType();
1138
1139 // Check if we can lower this to a native operation.
1140
Tom Stellard2add82d2013-03-08 15:37:09 +00001141 // Try to lower to a SET* instruction:
1142 //
1143 // SET* can match the following patterns:
1144 //
Tom Stellardcd428182013-09-28 02:50:38 +00001145 // select_cc f32, f32, -1, 0, cc_supported
1146 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1147 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001148 //
1149
1150 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001151 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1152 ISD::CondCode InverseCC =
1153 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001154 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1155 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1156 std::swap(False, True);
1157 CC = DAG.getCondCode(InverseCC);
1158 } else {
1159 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1160 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1161 std::swap(False, True);
1162 std::swap(LHS, RHS);
1163 CC = DAG.getCondCode(SwapInvCC);
1164 }
1165 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001166 }
1167
1168 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1169 (CompareVT == VT || VT == MVT::i32)) {
1170 // This can be matched by a SET* instruction.
1171 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1172 }
1173
Tom Stellard75aadc22012-12-11 21:25:42 +00001174 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001175 //
1176 // CND* can match the following patterns:
1177 //
Tom Stellardcd428182013-09-28 02:50:38 +00001178 // select_cc f32, 0.0, f32, f32, cc_supported
1179 // select_cc f32, 0.0, i32, i32, cc_supported
1180 // select_cc i32, 0, f32, f32, cc_supported
1181 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001182 //
Tom Stellardcd428182013-09-28 02:50:38 +00001183
1184 // Try to move the zero value to the RHS
1185 if (isZero(LHS)) {
1186 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1187 // Try swapping the operands
1188 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1189 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1190 std::swap(LHS, RHS);
1191 CC = DAG.getCondCode(CCSwapped);
1192 } else {
1193 // Try inverting the conditon and then swapping the operands
1194 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1195 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1196 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1197 std::swap(True, False);
1198 std::swap(LHS, RHS);
1199 CC = DAG.getCondCode(CCSwapped);
1200 }
1201 }
1202 }
1203 if (isZero(RHS)) {
1204 SDValue Cond = LHS;
1205 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001206 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1207 if (CompareVT != VT) {
1208 // Bitcast True / False to the correct types. This will end up being
1209 // a nop, but it allows us to define only a single pattern in the
1210 // .TD files for each CND* instruction rather than having to have
1211 // one pattern for integer True/False and one for fp True/False
1212 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1213 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1214 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001215
1216 switch (CCOpcode) {
1217 case ISD::SETONE:
1218 case ISD::SETUNE:
1219 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001220 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1221 Temp = True;
1222 True = False;
1223 False = Temp;
1224 break;
1225 default:
1226 break;
1227 }
1228 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1229 Cond, Zero,
1230 True, False,
1231 DAG.getCondCode(CCOpcode));
1232 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1233 }
1234
Tom Stellard75aadc22012-12-11 21:25:42 +00001235 // If we make it this for it means we have no native instructions to handle
1236 // this SELECT_CC, so we must lower it.
1237 SDValue HWTrue, HWFalse;
1238
1239 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001240 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1241 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001242 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001243 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1244 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001245 }
1246 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001247 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001248 }
1249
1250 // Lower this unsupported SELECT_CC into a combination of two supported
1251 // SELECT_CC operations.
1252 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1253
1254 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1255 Cond, HWFalse,
1256 True, False,
1257 DAG.getCondCode(ISD::SETNE));
1258}
1259
Alp Tokercb402912014-01-24 17:20:08 +00001260/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001261/// convert these pointers to a register index. Each register holds
1262/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1263/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1264/// for indirect addressing.
1265SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1266 unsigned StackWidth,
1267 SelectionDAG &DAG) const {
1268 unsigned SRLPad;
1269 switch(StackWidth) {
1270 case 1:
1271 SRLPad = 2;
1272 break;
1273 case 2:
1274 SRLPad = 3;
1275 break;
1276 case 4:
1277 SRLPad = 4;
1278 break;
1279 default: llvm_unreachable("Invalid stack width");
1280 }
1281
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001282 SDLoc DL(Ptr);
1283 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1284 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001285}
1286
1287void R600TargetLowering::getStackAddress(unsigned StackWidth,
1288 unsigned ElemIdx,
1289 unsigned &Channel,
1290 unsigned &PtrIncr) const {
1291 switch (StackWidth) {
1292 default:
1293 case 1:
1294 Channel = 0;
1295 if (ElemIdx > 0) {
1296 PtrIncr = 1;
1297 } else {
1298 PtrIncr = 0;
1299 }
1300 break;
1301 case 2:
1302 Channel = ElemIdx % 2;
1303 if (ElemIdx == 2) {
1304 PtrIncr = 1;
1305 } else {
1306 PtrIncr = 0;
1307 }
1308 break;
1309 case 4:
1310 Channel = ElemIdx;
1311 PtrIncr = 0;
1312 break;
1313 }
1314}
1315
Tom Stellard75aadc22012-12-11 21:25:42 +00001316SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001317 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001318 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1319 SDValue Chain = Op.getOperand(0);
1320 SDValue Value = Op.getOperand(1);
1321 SDValue Ptr = Op.getOperand(2);
1322
Tom Stellard2ffc3302013-08-26 15:05:44 +00001323 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001324 if (Result.getNode()) {
1325 return Result;
1326 }
1327
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001328 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1329 if (StoreNode->isTruncatingStore()) {
1330 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001331 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001332 EVT MemVT = StoreNode->getMemoryVT();
1333 SDValue MaskConstant;
1334 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001335 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001336 } else {
1337 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001338 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001339 }
1340 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001341 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001342 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001343 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001344 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1345 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001346 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001347 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1348 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1349 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1350 // vector instead.
1351 SDValue Src[4] = {
1352 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001353 DAG.getConstant(0, DL, MVT::i32),
1354 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001355 Mask
1356 };
Craig Topper48d114b2014-04-26 18:35:24 +00001357 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001358 SDValue Args[3] = { Chain, Input, DWordAddr };
1359 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001360 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001361 StoreNode->getMemOperand());
1362 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1363 Value.getValueType().bitsGE(MVT::i32)) {
1364 // Convert pointer from byte address to dword address.
1365 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1366 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001367 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001368
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001369 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001370 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001371 } else {
1372 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1373 }
1374 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001375 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001376 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001377
1378 EVT ValueVT = Value.getValueType();
1379
1380 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1381 return SDValue();
1382 }
1383
Tom Stellarde9373602014-01-22 19:24:14 +00001384 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1385 if (Ret.getNode()) {
1386 return Ret;
1387 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001388 // Lowering for indirect addressing
1389
1390 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001391 const AMDGPUFrameLowering *TFL =
1392 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001393 unsigned StackWidth = TFL->getStackWidth(MF);
1394
1395 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1396
1397 if (ValueVT.isVector()) {
1398 unsigned NumElemVT = ValueVT.getVectorNumElements();
1399 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001400 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001401
1402 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1403 "vector width in load");
1404
1405 for (unsigned i = 0; i < NumElemVT; ++i) {
1406 unsigned Channel, PtrIncr;
1407 getStackAddress(StackWidth, i, Channel, PtrIncr);
1408 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001409 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001410 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001411 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001412
1413 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1414 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001415 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001416 }
Craig Topper48d114b2014-04-26 18:35:24 +00001417 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001418 } else {
1419 if (ValueVT == MVT::i8) {
1420 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1421 }
1422 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001423 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001424 }
1425
1426 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001427}
1428
Tom Stellard365366f2013-01-23 02:09:06 +00001429// return (512 + (kc_bank << 12)
1430static int
1431ConstantAddressBlock(unsigned AddressSpace) {
1432 switch (AddressSpace) {
1433 case AMDGPUAS::CONSTANT_BUFFER_0:
1434 return 512;
1435 case AMDGPUAS::CONSTANT_BUFFER_1:
1436 return 512 + 4096;
1437 case AMDGPUAS::CONSTANT_BUFFER_2:
1438 return 512 + 4096 * 2;
1439 case AMDGPUAS::CONSTANT_BUFFER_3:
1440 return 512 + 4096 * 3;
1441 case AMDGPUAS::CONSTANT_BUFFER_4:
1442 return 512 + 4096 * 4;
1443 case AMDGPUAS::CONSTANT_BUFFER_5:
1444 return 512 + 4096 * 5;
1445 case AMDGPUAS::CONSTANT_BUFFER_6:
1446 return 512 + 4096 * 6;
1447 case AMDGPUAS::CONSTANT_BUFFER_7:
1448 return 512 + 4096 * 7;
1449 case AMDGPUAS::CONSTANT_BUFFER_8:
1450 return 512 + 4096 * 8;
1451 case AMDGPUAS::CONSTANT_BUFFER_9:
1452 return 512 + 4096 * 9;
1453 case AMDGPUAS::CONSTANT_BUFFER_10:
1454 return 512 + 4096 * 10;
1455 case AMDGPUAS::CONSTANT_BUFFER_11:
1456 return 512 + 4096 * 11;
1457 case AMDGPUAS::CONSTANT_BUFFER_12:
1458 return 512 + 4096 * 12;
1459 case AMDGPUAS::CONSTANT_BUFFER_13:
1460 return 512 + 4096 * 13;
1461 case AMDGPUAS::CONSTANT_BUFFER_14:
1462 return 512 + 4096 * 14;
1463 case AMDGPUAS::CONSTANT_BUFFER_15:
1464 return 512 + 4096 * 15;
1465 default:
1466 return -1;
1467 }
1468}
1469
1470SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1471{
1472 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001473 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001474 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1475 SDValue Chain = Op.getOperand(0);
1476 SDValue Ptr = Op.getOperand(1);
1477 SDValue LoweredLoad;
1478
Tom Stellarde9373602014-01-22 19:24:14 +00001479 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1480 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001481 SDValue Ops[2] = {
1482 Ret,
1483 Chain
1484 };
Craig Topper64941d92014-04-27 19:20:57 +00001485 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001486 }
1487
Tom Stellard067c8152014-07-21 14:01:14 +00001488 // Lower loads constant address space global variable loads
1489 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001490 isa<GlobalVariable>(GetUnderlyingObject(
1491 LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001492
1493 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1494 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1495 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001496 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001497 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1498 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001499 DAG.getTargetConstant(0, DL, MVT::i32),
1500 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001501 }
Tom Stellarde9373602014-01-22 19:24:14 +00001502
Tom Stellard35bb18c2013-08-26 15:06:04 +00001503 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1504 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001505 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001506 Chain
1507 };
Craig Topper64941d92014-04-27 19:20:57 +00001508 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001509 }
1510
Tom Stellard365366f2013-01-23 02:09:06 +00001511 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001512 if (ConstantBlock > -1 &&
1513 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1514 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001515 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001516 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1517 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001518 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001519 SDValue Slots[4];
1520 for (unsigned i = 0; i < 4; i++) {
1521 // We want Const position encoded with the following formula :
1522 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1523 // const_index is Ptr computed by llvm using an alignment of 16.
1524 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1525 // then div by 4 at the ISel step
1526 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001527 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001528 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1529 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001530 EVT NewVT = MVT::v4i32;
1531 unsigned NumElements = 4;
1532 if (VT.isVector()) {
1533 NewVT = VT;
1534 NumElements = VT.getVectorNumElements();
1535 }
Craig Topper48d114b2014-04-26 18:35:24 +00001536 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001537 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001538 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001539 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001540 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001541 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1542 DAG.getConstant(4, DL, MVT::i32)),
1543 DAG.getConstant(LoadNode->getAddressSpace() -
1544 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001545 );
1546 }
1547
1548 if (!VT.isVector()) {
1549 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001550 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001551 }
1552
1553 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001554 Result,
1555 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001556 };
Craig Topper64941d92014-04-27 19:20:57 +00001557 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001558 }
1559
Matt Arsenault909d0c02013-10-30 23:43:29 +00001560 // For most operations returning SDValue() will result in the node being
1561 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1562 // need to manually expand loads that may be legal in some address spaces and
1563 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1564 // compute shaders, since the data is sign extended when it is uploaded to the
1565 // buffer. However SEXT loads from other address spaces are not supported, so
1566 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001567 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1568 EVT MemVT = LoadNode->getMemoryVT();
1569 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1570 SDValue ShiftAmount =
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001571 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), DL,
1572 MVT::i32);
Tom Stellard84021442013-07-23 01:48:24 +00001573 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1574 LoadNode->getPointerInfo(), MemVT,
1575 LoadNode->isVolatile(),
1576 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001577 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001578 LoadNode->getAlignment());
1579 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1580 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1581
1582 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001583 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001584 }
1585
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001586 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1587 return SDValue();
1588 }
1589
1590 // Lowering for indirect addressing
1591 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001592 const AMDGPUFrameLowering *TFL =
1593 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001594 unsigned StackWidth = TFL->getStackWidth(MF);
1595
1596 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1597
1598 if (VT.isVector()) {
1599 unsigned NumElemVT = VT.getVectorNumElements();
1600 EVT ElemVT = VT.getVectorElementType();
1601 SDValue Loads[4];
1602
1603 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1604 "vector width in load");
1605
1606 for (unsigned i = 0; i < NumElemVT; ++i) {
1607 unsigned Channel, PtrIncr;
1608 getStackAddress(StackWidth, i, Channel, PtrIncr);
1609 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001610 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001611 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1612 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001613 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001614 Op.getOperand(2));
1615 }
1616 for (unsigned i = NumElemVT; i < 4; ++i) {
1617 Loads[i] = DAG.getUNDEF(ElemVT);
1618 }
1619 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001620 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001621 } else {
1622 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1623 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001624 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001625 Op.getOperand(2));
1626 }
1627
Matt Arsenault7939acd2014-04-07 16:44:24 +00001628 SDValue Ops[2] = {
1629 LoweredLoad,
1630 Chain
1631 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001632
Craig Topper64941d92014-04-27 19:20:57 +00001633 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001634}
Tom Stellard75aadc22012-12-11 21:25:42 +00001635
Matt Arsenault1d555c42014-06-23 18:00:55 +00001636SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1637 SDValue Chain = Op.getOperand(0);
1638 SDValue Cond = Op.getOperand(1);
1639 SDValue Jump = Op.getOperand(2);
1640
1641 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1642 Chain, Jump, Cond);
1643}
1644
Tom Stellard75aadc22012-12-11 21:25:42 +00001645/// XXX Only kernel functions are supported, so we can assume for now that
1646/// every function is a kernel function, but in the future we should use
1647/// separate calling conventions for kernel and non-kernel functions.
1648SDValue R600TargetLowering::LowerFormalArguments(
1649 SDValue Chain,
1650 CallingConv::ID CallConv,
1651 bool isVarArg,
1652 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001653 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001654 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001655 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001656 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1657 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001658 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001659 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001660
Tom Stellardaf775432013-10-23 00:44:32 +00001661 SmallVector<ISD::InputArg, 8> LocalIns;
1662
Matt Arsenault209a7b92014-04-18 07:40:20 +00001663 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001664
1665 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001666
Tom Stellard1e803092013-07-23 01:48:18 +00001667 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001668 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001669 const ISD::InputArg &In = Ins[i];
1670 EVT VT = In.VT;
1671 EVT MemVT = VA.getLocVT();
1672 if (!VT.isVector() && MemVT.isVector()) {
1673 // Get load source type if scalarized.
1674 MemVT = MemVT.getVectorElementType();
1675 }
Tom Stellard78e01292013-07-23 01:47:58 +00001676
Jan Veselye5121f32014-10-14 20:05:26 +00001677 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001678 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1679 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1680 InVals.push_back(Register);
1681 continue;
1682 }
1683
Tom Stellard75aadc22012-12-11 21:25:42 +00001684 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001685 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001686
Matt Arsenaultfae02982014-03-17 18:58:11 +00001687 // i64 isn't a legal type, so the register type used ends up as i32, which
1688 // isn't expected here. It attempts to create this sextload, but it ends up
1689 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1690 // for <1 x i64>.
1691
Tom Stellardacfeebf2013-07-23 01:48:05 +00001692 // The first 36 bytes of the input buffer contains information about
1693 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001694 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1695 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1696 // FIXME: This should really check the extload type, but the handling of
1697 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001698
Matt Arsenault74ef2772014-08-13 18:14:11 +00001699 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1700 Ext = ISD::SEXTLOAD;
1701 }
1702
1703 // Compute the offset from the value.
1704 // XXX - I think PartOffset should give you this, but it seems to give the
1705 // size of the register which isn't useful.
1706
Andrew Trick05938a52015-02-16 18:10:47 +00001707 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001708 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001709 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001710
1711 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1712 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001713 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001714 DAG.getUNDEF(MVT::i32),
1715 PtrInfo,
1716 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001717
1718 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001719 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001720 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001721 }
1722 return Chain;
1723}
1724
Matt Arsenault758659232013-05-18 00:21:46 +00001725EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001726 if (!VT.isVector())
1727 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001728 return VT.changeVectorElementTypeToInteger();
1729}
1730
Matt Arsenault209a7b92014-04-18 07:40:20 +00001731static SDValue CompactSwizzlableVector(
1732 SelectionDAG &DAG, SDValue VectorEntry,
1733 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001734 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1735 assert(RemapSwizzle.empty());
1736 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001737 VectorEntry.getOperand(0),
1738 VectorEntry.getOperand(1),
1739 VectorEntry.getOperand(2),
1740 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001741 };
1742
1743 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001744 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1745 // We mask write here to teach later passes that the ith element of this
1746 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1747 // break false dependencies and additionnaly make assembly easier to read.
1748 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001749 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1750 if (C->isZero()) {
1751 RemapSwizzle[i] = 4; // SEL_0
1752 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1753 } else if (C->isExactlyValue(1.0)) {
1754 RemapSwizzle[i] = 5; // SEL_1
1755 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1756 }
1757 }
1758
1759 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1760 continue;
1761 for (unsigned j = 0; j < i; j++) {
1762 if (NewBldVec[i] == NewBldVec[j]) {
1763 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1764 RemapSwizzle[i] = j;
1765 break;
1766 }
1767 }
1768 }
1769
1770 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001771 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001772}
1773
Benjamin Kramer193960c2013-06-11 13:32:25 +00001774static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1775 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001776 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1777 assert(RemapSwizzle.empty());
1778 SDValue NewBldVec[4] = {
1779 VectorEntry.getOperand(0),
1780 VectorEntry.getOperand(1),
1781 VectorEntry.getOperand(2),
1782 VectorEntry.getOperand(3)
1783 };
1784 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001785 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001786 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001787 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1788 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1789 ->getZExtValue();
1790 if (i == Idx)
1791 isUnmovable[Idx] = true;
1792 }
1793 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001794
1795 for (unsigned i = 0; i < 4; i++) {
1796 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1797 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1798 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001799 if (isUnmovable[Idx])
1800 continue;
1801 // Swap i and Idx
1802 std::swap(NewBldVec[Idx], NewBldVec[i]);
1803 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1804 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001805 }
1806 }
1807
1808 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001809 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001810}
1811
1812
1813SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001814 SDValue Swz[4], SelectionDAG &DAG,
1815 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001816 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1817 // Old -> New swizzle values
1818 DenseMap<unsigned, unsigned> SwizzleRemap;
1819
1820 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1821 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001822 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001823 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001824 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001825 }
1826
1827 SwizzleRemap.clear();
1828 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1829 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001830 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001831 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001832 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001833 }
1834
1835 return BuildVector;
1836}
1837
1838
Tom Stellard75aadc22012-12-11 21:25:42 +00001839//===----------------------------------------------------------------------===//
1840// Custom DAG Optimizations
1841//===----------------------------------------------------------------------===//
1842
1843SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1844 DAGCombinerInfo &DCI) const {
1845 SelectionDAG &DAG = DCI.DAG;
1846
1847 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001848 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001849 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1850 case ISD::FP_ROUND: {
1851 SDValue Arg = N->getOperand(0);
1852 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001853 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001854 Arg.getOperand(0));
1855 }
1856 break;
1857 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001858
1859 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1860 // (i32 select_cc f32, f32, -1, 0 cc)
1861 //
1862 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1863 // this to one of the SET*_DX10 instructions.
1864 case ISD::FP_TO_SINT: {
1865 SDValue FNeg = N->getOperand(0);
1866 if (FNeg.getOpcode() != ISD::FNEG) {
1867 return SDValue();
1868 }
1869 SDValue SelectCC = FNeg.getOperand(0);
1870 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1871 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1872 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1873 !isHWTrueValue(SelectCC.getOperand(2)) ||
1874 !isHWFalseValue(SelectCC.getOperand(3))) {
1875 return SDValue();
1876 }
1877
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001878 SDLoc dl(N);
1879 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001880 SelectCC.getOperand(0), // LHS
1881 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001882 DAG.getConstant(-1, dl, MVT::i32), // True
1883 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001884 SelectCC.getOperand(4)); // CC
1885
1886 break;
1887 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001888
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001889 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1890 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001891 case ISD::INSERT_VECTOR_ELT: {
1892 SDValue InVec = N->getOperand(0);
1893 SDValue InVal = N->getOperand(1);
1894 SDValue EltNo = N->getOperand(2);
1895 SDLoc dl(N);
1896
1897 // If the inserted element is an UNDEF, just use the input vector.
1898 if (InVal.getOpcode() == ISD::UNDEF)
1899 return InVec;
1900
1901 EVT VT = InVec.getValueType();
1902
1903 // If we can't generate a legal BUILD_VECTOR, exit
1904 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1905 return SDValue();
1906
1907 // Check that we know which element is being inserted
1908 if (!isa<ConstantSDNode>(EltNo))
1909 return SDValue();
1910 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1911
1912 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1913 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1914 // vector elements.
1915 SmallVector<SDValue, 8> Ops;
1916 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1917 Ops.append(InVec.getNode()->op_begin(),
1918 InVec.getNode()->op_end());
1919 } else if (InVec.getOpcode() == ISD::UNDEF) {
1920 unsigned NElts = VT.getVectorNumElements();
1921 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1922 } else {
1923 return SDValue();
1924 }
1925
1926 // Insert the element
1927 if (Elt < Ops.size()) {
1928 // All the operands of BUILD_VECTOR must have the same type;
1929 // we enforce that here.
1930 EVT OpVT = Ops[0].getValueType();
1931 if (InVal.getValueType() != OpVT)
1932 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1933 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1934 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1935 Ops[Elt] = InVal;
1936 }
1937
1938 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001939 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001940 }
1941
Tom Stellard365366f2013-01-23 02:09:06 +00001942 // Extract_vec (Build_vector) generated by custom lowering
1943 // also needs to be customly combined
1944 case ISD::EXTRACT_VECTOR_ELT: {
1945 SDValue Arg = N->getOperand(0);
1946 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1947 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1948 unsigned Element = Const->getZExtValue();
1949 return Arg->getOperand(Element);
1950 }
1951 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001952 if (Arg.getOpcode() == ISD::BITCAST &&
1953 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1954 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1955 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001956 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001957 Arg->getOperand(0).getOperand(Element));
1958 }
1959 }
Tom Stellard365366f2013-01-23 02:09:06 +00001960 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001961
1962 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001963 // Try common optimizations
1964 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1965 if (Ret.getNode())
1966 return Ret;
1967
Tom Stellarde06163a2013-02-07 14:02:35 +00001968 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1969 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001970 //
1971 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1972 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001973 SDValue LHS = N->getOperand(0);
1974 if (LHS.getOpcode() != ISD::SELECT_CC) {
1975 return SDValue();
1976 }
1977
1978 SDValue RHS = N->getOperand(1);
1979 SDValue True = N->getOperand(2);
1980 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001981 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001982
1983 if (LHS.getOperand(2).getNode() != True.getNode() ||
1984 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001985 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001986 return SDValue();
1987 }
1988
Tom Stellard5e524892013-03-08 15:37:11 +00001989 switch (NCC) {
1990 default: return SDValue();
1991 case ISD::SETNE: return LHS;
1992 case ISD::SETEQ: {
1993 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1994 LHSCC = ISD::getSetCCInverse(LHSCC,
1995 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001996 if (DCI.isBeforeLegalizeOps() ||
1997 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1998 return DAG.getSelectCC(SDLoc(N),
1999 LHS.getOperand(0),
2000 LHS.getOperand(1),
2001 LHS.getOperand(2),
2002 LHS.getOperand(3),
2003 LHSCC);
2004 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002005 }
Tom Stellard5e524892013-03-08 15:37:11 +00002006 }
Tom Stellardcd428182013-09-28 02:50:38 +00002007 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002008 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002009
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002010 case AMDGPUISD::EXPORT: {
2011 SDValue Arg = N->getOperand(1);
2012 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2013 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002014
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002015 SDValue NewArgs[8] = {
2016 N->getOperand(0), // Chain
2017 SDValue(),
2018 N->getOperand(2), // ArrayBase
2019 N->getOperand(3), // Type
2020 N->getOperand(4), // SWZ_X
2021 N->getOperand(5), // SWZ_Y
2022 N->getOperand(6), // SWZ_Z
2023 N->getOperand(7) // SWZ_W
2024 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002025 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002026 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002027 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002028 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002029 case AMDGPUISD::TEXTURE_FETCH: {
2030 SDValue Arg = N->getOperand(1);
2031 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2032 break;
2033
2034 SDValue NewArgs[19] = {
2035 N->getOperand(0),
2036 N->getOperand(1),
2037 N->getOperand(2),
2038 N->getOperand(3),
2039 N->getOperand(4),
2040 N->getOperand(5),
2041 N->getOperand(6),
2042 N->getOperand(7),
2043 N->getOperand(8),
2044 N->getOperand(9),
2045 N->getOperand(10),
2046 N->getOperand(11),
2047 N->getOperand(12),
2048 N->getOperand(13),
2049 N->getOperand(14),
2050 N->getOperand(15),
2051 N->getOperand(16),
2052 N->getOperand(17),
2053 N->getOperand(18),
2054 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002055 SDLoc DL(N);
2056 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2057 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002058 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002059 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002060
2061 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002062}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002063
2064static bool
2065FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002066 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002067 const R600InstrInfo *TII =
2068 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002069 if (!Src.isMachineOpcode())
2070 return false;
2071 switch (Src.getMachineOpcode()) {
2072 case AMDGPU::FNEG_R600:
2073 if (!Neg.getNode())
2074 return false;
2075 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002076 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002077 return true;
2078 case AMDGPU::FABS_R600:
2079 if (!Abs.getNode())
2080 return false;
2081 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002082 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002083 return true;
2084 case AMDGPU::CONST_COPY: {
2085 unsigned Opcode = ParentNode->getMachineOpcode();
2086 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2087
2088 if (!Sel.getNode())
2089 return false;
2090
2091 SDValue CstOffset = Src.getOperand(0);
2092 if (ParentNode->getValueType(0).isVector())
2093 return false;
2094
2095 // Gather constants values
2096 int SrcIndices[] = {
2097 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2098 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2099 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2100 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2101 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2102 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2103 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2104 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2105 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2106 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2107 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2108 };
2109 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002110 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002111 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2112 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2113 continue;
2114 if (HasDst) {
2115 OtherSrcIdx--;
2116 OtherSelIdx--;
2117 }
2118 if (RegisterSDNode *Reg =
2119 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2120 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002121 ConstantSDNode *Cst
2122 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002123 Consts.push_back(Cst->getZExtValue());
2124 }
2125 }
2126 }
2127
Matt Arsenault37c12d72014-05-12 20:42:57 +00002128 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002129 Consts.push_back(Cst->getZExtValue());
2130 if (!TII->fitsConstReadLimitations(Consts)) {
2131 return false;
2132 }
2133
2134 Sel = CstOffset;
2135 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2136 return true;
2137 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002138 case AMDGPU::MOV_IMM_I32:
2139 case AMDGPU::MOV_IMM_F32: {
2140 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2141 uint64_t ImmValue = 0;
2142
2143
2144 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2145 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2146 float FloatValue = FPC->getValueAPF().convertToFloat();
2147 if (FloatValue == 0.0) {
2148 ImmReg = AMDGPU::ZERO;
2149 } else if (FloatValue == 0.5) {
2150 ImmReg = AMDGPU::HALF;
2151 } else if (FloatValue == 1.0) {
2152 ImmReg = AMDGPU::ONE;
2153 } else {
2154 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2155 }
2156 } else {
2157 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2158 uint64_t Value = C->getZExtValue();
2159 if (Value == 0) {
2160 ImmReg = AMDGPU::ZERO;
2161 } else if (Value == 1) {
2162 ImmReg = AMDGPU::ONE_INT;
2163 } else {
2164 ImmValue = Value;
2165 }
2166 }
2167
2168 // Check that we aren't already using an immediate.
2169 // XXX: It's possible for an instruction to have more than one
2170 // immediate operand, but this is not supported yet.
2171 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2172 if (!Imm.getNode())
2173 return false;
2174 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2175 assert(C);
2176 if (C->getZExtValue())
2177 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002178 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002179 }
2180 Src = DAG.getRegister(ImmReg, MVT::i32);
2181 return true;
2182 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002183 default:
2184 return false;
2185 }
2186}
2187
2188
2189/// \brief Fold the instructions after selecting them
2190SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2191 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002192 const R600InstrInfo *TII =
2193 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002194 if (!Node->isMachineOpcode())
2195 return Node;
2196 unsigned Opcode = Node->getMachineOpcode();
2197 SDValue FakeOp;
2198
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002199 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002200
2201 if (Opcode == AMDGPU::DOT_4) {
2202 int OperandIdx[] = {
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002211 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002212 int NegIdx[] = {
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2221 };
2222 int AbsIdx[] = {
2223 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2224 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2228 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2229 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2230 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2231 };
2232 for (unsigned i = 0; i < 8; i++) {
2233 if (OperandIdx[i] < 0)
2234 return Node;
2235 SDValue &Src = Ops[OperandIdx[i] - 1];
2236 SDValue &Neg = Ops[NegIdx[i] - 1];
2237 SDValue &Abs = Ops[AbsIdx[i] - 1];
2238 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2239 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2240 if (HasDst)
2241 SelIdx--;
2242 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002243 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2244 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2245 }
2246 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2247 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2248 SDValue &Src = Ops[i];
2249 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002250 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2251 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002252 } else if (Opcode == AMDGPU::CLAMP_R600) {
2253 SDValue Src = Node->getOperand(0);
2254 if (!Src.isMachineOpcode() ||
2255 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2256 return Node;
2257 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2258 AMDGPU::OpName::clamp);
2259 if (ClampIdx < 0)
2260 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002261 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002262 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002263 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2264 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2265 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002266 } else {
2267 if (!TII->hasInstrModifiers(Opcode))
2268 return Node;
2269 int OperandIdx[] = {
2270 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2271 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2272 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2273 };
2274 int NegIdx[] = {
2275 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2276 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2277 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2278 };
2279 int AbsIdx[] = {
2280 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2281 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2282 -1
2283 };
2284 for (unsigned i = 0; i < 3; i++) {
2285 if (OperandIdx[i] < 0)
2286 return Node;
2287 SDValue &Src = Ops[OperandIdx[i] - 1];
2288 SDValue &Neg = Ops[NegIdx[i] - 1];
2289 SDValue FakeAbs;
2290 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2291 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2292 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002293 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2294 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002295 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002296 ImmIdx--;
2297 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002298 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002299 SDValue &Imm = Ops[ImmIdx];
2300 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002301 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2302 }
2303 }
2304
2305 return Node;
2306}