blob: b6b7067f7e1d462ab95409a6dc261b425a840f2c [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Matt Arsenault4e466652014-04-16 01:41:30 +000094 // Expand sign extension of vectors
95 if (!Subtarget->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
97
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
105
106 if (!Subtarget->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
114
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
116
117
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000120 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000121 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000125 for (MVT VT : MVT::integer_valuetypes()) {
126 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
127 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
128 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000129
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000130 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
131 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
132 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000133
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000134 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
137 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000138
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000139 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000140 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000141 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000142 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000143 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000145
Tom Stellard365366f2013-01-23 02:09:06 +0000146 setOperationAction(ISD::LOAD, MVT::i32, Custom);
147 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000148 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
149
Tom Stellard880a80a2014-06-17 16:53:14 +0000150 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
151 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
152 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
153 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
154
155 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
156 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
157 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
158 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
159
Tom Stellard75aadc22012-12-11 21:25:42 +0000160 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000161 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000162 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000163 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000164 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000165
Jan Veselyffcd9682015-04-13 17:47:15 +0000166 setOperationAction(ISD::SUB, MVT::i64, Expand);
167
Tom Stellard5f337882014-04-29 23:12:43 +0000168 // These should be replaced by UDVIREM, but it does not happen automatically
169 // during Type Legalization
170 setOperationAction(ISD::UDIV, MVT::i64, Custom);
171 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000172 setOperationAction(ISD::SDIV, MVT::i64, Custom);
173 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000174
Jan Vesely25f36272014-06-18 12:27:13 +0000175 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
176 // to be Legal/Custom in order to avoid library calls.
177 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000178 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000179 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000180
Michel Danzer49812b52013-07-10 16:37:07 +0000181 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
182
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000183 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
184 for (MVT VT : ScalarIntVTs) {
185 setOperationAction(ISD::ADDC, VT, Expand);
186 setOperationAction(ISD::SUBC, VT, Expand);
187 setOperationAction(ISD::ADDE, VT, Expand);
188 setOperationAction(ISD::SUBE, VT, Expand);
189 }
190
Tom Stellardfc455472013-08-12 22:33:21 +0000191 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000192}
193
194MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
195 MachineInstr * MI, MachineBasicBlock * BB) const {
196 MachineFunction * MF = BB->getParent();
197 MachineRegisterInfo &MRI = MF->getRegInfo();
198 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000199 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000200 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000201
202 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000203 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000204 // Replace LDS_*_RET instruction that don't have any uses with the
205 // equivalent LDS_*_NORET instruction.
206 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000207 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
208 assert(DstIdx != -1);
209 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000210 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
211 // LDS_1A2D support and remove this special case.
212 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
213 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000214 return BB;
215
216 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
217 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000218 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
219 NewMI.addOperand(MI->getOperand(i));
220 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 } else {
222 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
223 }
224 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225 case AMDGPU::CLAMP_R600: {
226 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
227 AMDGPU::MOV,
228 MI->getOperand(0).getReg(),
229 MI->getOperand(1).getReg());
230 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
231 break;
232 }
233
234 case AMDGPU::FABS_R600: {
235 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
236 AMDGPU::MOV,
237 MI->getOperand(0).getReg(),
238 MI->getOperand(1).getReg());
239 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
240 break;
241 }
242
243 case AMDGPU::FNEG_R600: {
244 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
245 AMDGPU::MOV,
246 MI->getOperand(0).getReg(),
247 MI->getOperand(1).getReg());
248 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
249 break;
250 }
251
Tom Stellard75aadc22012-12-11 21:25:42 +0000252 case AMDGPU::MASK_WRITE: {
253 unsigned maskedRegister = MI->getOperand(0).getReg();
254 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
255 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
256 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
257 break;
258 }
259
260 case AMDGPU::MOV_IMM_F32:
261 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
262 MI->getOperand(1).getFPImm()->getValueAPF()
263 .bitcastToAPInt().getZExtValue());
264 break;
265 case AMDGPU::MOV_IMM_I32:
266 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
267 MI->getOperand(1).getImm());
268 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000269 case AMDGPU::CONST_COPY: {
270 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
271 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000272 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000273 MI->getOperand(1).getImm());
274 break;
275 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000276
277 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000278 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000279 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000280 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000281
282 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
283 .addOperand(MI->getOperand(0))
284 .addOperand(MI->getOperand(1))
285 .addImm(EOP); // Set End of program bit
286 break;
287 }
288
Tom Stellard75aadc22012-12-11 21:25:42 +0000289 case AMDGPU::TXD: {
290 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
291 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000292 MachineOperand &RID = MI->getOperand(4);
293 MachineOperand &SID = MI->getOperand(5);
294 unsigned TextureId = MI->getOperand(6).getImm();
295 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
296 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 switch (TextureId) {
299 case 5: // Rect
300 CTX = CTY = 0;
301 break;
302 case 6: // Shadow1D
303 SrcW = SrcZ;
304 break;
305 case 7: // Shadow2D
306 SrcW = SrcZ;
307 break;
308 case 8: // ShadowRect
309 CTX = CTY = 0;
310 SrcW = SrcZ;
311 break;
312 case 9: // 1DArray
313 SrcZ = SrcY;
314 CTZ = 0;
315 break;
316 case 10: // 2DArray
317 CTZ = 0;
318 break;
319 case 11: // Shadow1DArray
320 SrcZ = SrcY;
321 CTZ = 0;
322 break;
323 case 12: // Shadow2DArray
324 CTZ = 0;
325 break;
326 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000327 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
328 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000329 .addImm(SrcX)
330 .addImm(SrcY)
331 .addImm(SrcZ)
332 .addImm(SrcW)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(0)
337 .addImm(1)
338 .addImm(2)
339 .addImm(3)
340 .addOperand(RID)
341 .addOperand(SID)
342 .addImm(CTX)
343 .addImm(CTY)
344 .addImm(CTZ)
345 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000346 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
347 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000348 .addImm(SrcX)
349 .addImm(SrcY)
350 .addImm(SrcZ)
351 .addImm(SrcW)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(1)
357 .addImm(2)
358 .addImm(3)
359 .addOperand(RID)
360 .addOperand(SID)
361 .addImm(CTX)
362 .addImm(CTY)
363 .addImm(CTZ)
364 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000365 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
366 .addOperand(MI->getOperand(0))
367 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000368 .addImm(SrcX)
369 .addImm(SrcY)
370 .addImm(SrcZ)
371 .addImm(SrcW)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(0)
376 .addImm(1)
377 .addImm(2)
378 .addImm(3)
379 .addOperand(RID)
380 .addOperand(SID)
381 .addImm(CTX)
382 .addImm(CTY)
383 .addImm(CTZ)
384 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000385 .addReg(T0, RegState::Implicit)
386 .addReg(T1, RegState::Implicit);
387 break;
388 }
389
390 case AMDGPU::TXD_SHADOW: {
391 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
392 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000393 MachineOperand &RID = MI->getOperand(4);
394 MachineOperand &SID = MI->getOperand(5);
395 unsigned TextureId = MI->getOperand(6).getImm();
396 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
397 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
398
399 switch (TextureId) {
400 case 5: // Rect
401 CTX = CTY = 0;
402 break;
403 case 6: // Shadow1D
404 SrcW = SrcZ;
405 break;
406 case 7: // Shadow2D
407 SrcW = SrcZ;
408 break;
409 case 8: // ShadowRect
410 CTX = CTY = 0;
411 SrcW = SrcZ;
412 break;
413 case 9: // 1DArray
414 SrcZ = SrcY;
415 CTZ = 0;
416 break;
417 case 10: // 2DArray
418 CTZ = 0;
419 break;
420 case 11: // Shadow1DArray
421 SrcZ = SrcY;
422 CTZ = 0;
423 break;
424 case 12: // Shadow2DArray
425 CTZ = 0;
426 break;
427 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000428
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
430 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000431 .addImm(SrcX)
432 .addImm(SrcY)
433 .addImm(SrcZ)
434 .addImm(SrcW)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(0)
439 .addImm(1)
440 .addImm(2)
441 .addImm(3)
442 .addOperand(RID)
443 .addOperand(SID)
444 .addImm(CTX)
445 .addImm(CTY)
446 .addImm(CTZ)
447 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
449 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000450 .addImm(SrcX)
451 .addImm(SrcY)
452 .addImm(SrcZ)
453 .addImm(SrcW)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(0)
458 .addImm(1)
459 .addImm(2)
460 .addImm(3)
461 .addOperand(RID)
462 .addOperand(SID)
463 .addImm(CTX)
464 .addImm(CTY)
465 .addImm(CTZ)
466 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000467 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
468 .addOperand(MI->getOperand(0))
469 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000470 .addImm(SrcX)
471 .addImm(SrcY)
472 .addImm(SrcZ)
473 .addImm(SrcW)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(0)
478 .addImm(1)
479 .addImm(2)
480 .addImm(3)
481 .addOperand(RID)
482 .addOperand(SID)
483 .addImm(CTX)
484 .addImm(CTY)
485 .addImm(CTZ)
486 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 .addReg(T0, RegState::Implicit)
488 .addReg(T1, RegState::Implicit);
489 break;
490 }
491
492 case AMDGPU::BRANCH:
493 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000494 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 break;
496
497 case AMDGPU::BRANCH_COND_f32: {
498 MachineInstr *NewMI =
499 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
500 AMDGPU::PREDICATE_BIT)
501 .addOperand(MI->getOperand(1))
502 .addImm(OPCODE_IS_NOT_ZERO)
503 .addImm(0); // Flags
504 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000505 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 .addOperand(MI->getOperand(0))
507 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
508 break;
509 }
510
511 case AMDGPU::BRANCH_COND_i32: {
512 MachineInstr *NewMI =
513 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
514 AMDGPU::PREDICATE_BIT)
515 .addOperand(MI->getOperand(1))
516 .addImm(OPCODE_IS_NOT_ZERO_INT)
517 .addImm(0); // Flags
518 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000519 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 .addOperand(MI->getOperand(0))
521 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
522 break;
523 }
524
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 case AMDGPU::EG_ExportSwz:
526 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000527 // Instruction is left unmodified if its not the last one of its type
528 bool isLastInstructionOfItsType = true;
529 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000530 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000531 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000532 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000533 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
534 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
535 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
536 .getImm();
537 if (CurrentInstExportType == InstExportType) {
538 isLastInstructionOfItsType = false;
539 break;
540 }
541 }
542 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000543 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000544 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 return BB;
546 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
547 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
548 .addOperand(MI->getOperand(0))
549 .addOperand(MI->getOperand(1))
550 .addOperand(MI->getOperand(2))
551 .addOperand(MI->getOperand(3))
552 .addOperand(MI->getOperand(4))
553 .addOperand(MI->getOperand(5))
554 .addOperand(MI->getOperand(6))
555 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000556 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 break;
558 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000559 case AMDGPU::RETURN: {
560 // RETURN instructions must have the live-out registers as implicit uses,
561 // otherwise they appear dead.
562 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
563 MachineInstrBuilder MIB(*MF, MI);
564 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
565 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
566 return BB;
567 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 }
569
570 MI->eraseFromParent();
571 return BB;
572}
573
574//===----------------------------------------------------------------------===//
575// Custom DAG Lowering Operations
576//===----------------------------------------------------------------------===//
577
Tom Stellard75aadc22012-12-11 21:25:42 +0000578SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000579 MachineFunction &MF = DAG.getMachineFunction();
580 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000581 switch (Op.getOpcode()) {
582 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000583 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
584 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000585 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000586 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000587 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000588 case ISD::FCOS:
589 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000592 case ISD::LOAD: {
593 SDValue Result = LowerLOAD(Op, DAG);
594 assert((!Result.getNode() ||
595 Result.getNode()->getNumValues() == 2) &&
596 "Load should return a value and a chain");
597 return Result;
598 }
599
Matt Arsenault1d555c42014-06-23 18:00:55 +0000600 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000601 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::INTRINSIC_VOID: {
603 SDValue Chain = Op.getOperand(0);
604 unsigned IntrinsicID =
605 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
606 switch (IntrinsicID) {
607 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000608 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
609 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000610 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000611 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000612 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000613 case AMDGPUIntrinsic::R600_store_swizzle: {
614 const SDValue Args[8] = {
615 Chain,
616 Op.getOperand(2), // Export Value
617 Op.getOperand(3), // ArrayBase
618 Op.getOperand(4), // Type
Daniel Jasper48e93f72015-04-28 13:38:35 +0000619 DAG.getConstant(0, MVT::i32), // SWZ_X
620 DAG.getConstant(1, MVT::i32), // SWZ_Y
621 DAG.getConstant(2, MVT::i32), // SWZ_Z
622 DAG.getConstant(3, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000623 };
Daniel Jasper48e93f72015-04-28 13:38:35 +0000624 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000625 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000626
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 // default for switch(IntrinsicID)
628 default: break;
629 }
630 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
631 break;
632 }
633 case ISD::INTRINSIC_WO_CHAIN: {
634 unsigned IntrinsicID =
635 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
636 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000637 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000638 switch(IntrinsicID) {
639 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000640 case AMDGPUIntrinsic::R600_load_input: {
641 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
642 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
643 MachineFunction &MF = DAG.getMachineFunction();
644 MachineRegisterInfo &MRI = MF.getRegInfo();
645 MRI.addLiveIn(Reg);
646 return DAG.getCopyFromReg(DAG.getEntryNode(),
647 SDLoc(DAG.getEntryNode()), Reg, VT);
648 }
649
650 case AMDGPUIntrinsic::R600_interp_input: {
651 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
652 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
653 MachineSDNode *interp;
654 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000655 const R600InstrInfo *TII =
656 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000657 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
Daniel Jasper48e93f72015-04-28 13:38:35 +0000658 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000659 return DAG.getTargetExtractSubreg(
660 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
661 DL, MVT::f32, SDValue(interp, 0));
662 }
663 MachineFunction &MF = DAG.getMachineFunction();
664 MachineRegisterInfo &MRI = MF.getRegInfo();
665 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
666 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
667 MRI.addLiveIn(RegisterI);
668 MRI.addLiveIn(RegisterJ);
669 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
670 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
671 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
672 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
673
674 if (slot % 4 < 2)
675 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Daniel Jasper48e93f72015-04-28 13:38:35 +0000676 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000677 RegisterJNode, RegisterINode);
678 else
679 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Daniel Jasper48e93f72015-04-28 13:38:35 +0000680 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000681 RegisterJNode, RegisterINode);
682 return SDValue(interp, slot % 2);
683 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000684 case AMDGPUIntrinsic::R600_interp_xy:
685 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000686 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000687 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000688 SDValue RegisterINode = Op.getOperand(2);
689 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000690
Vincent Lejeunef143af32013-11-11 22:10:24 +0000691 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000692 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Daniel Jasper48e93f72015-04-28 13:38:35 +0000693 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000694 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000695 else
696 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Daniel Jasper48e93f72015-04-28 13:38:35 +0000697 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000698 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000699 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
700 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 case AMDGPUIntrinsic::R600_tex:
703 case AMDGPUIntrinsic::R600_texc:
704 case AMDGPUIntrinsic::R600_txl:
705 case AMDGPUIntrinsic::R600_txlc:
706 case AMDGPUIntrinsic::R600_txb:
707 case AMDGPUIntrinsic::R600_txbc:
708 case AMDGPUIntrinsic::R600_txf:
709 case AMDGPUIntrinsic::R600_txq:
710 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000711 case AMDGPUIntrinsic::R600_ddy:
712 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000713 unsigned TextureOp;
714 switch (IntrinsicID) {
715 case AMDGPUIntrinsic::R600_tex:
716 TextureOp = 0;
717 break;
718 case AMDGPUIntrinsic::R600_texc:
719 TextureOp = 1;
720 break;
721 case AMDGPUIntrinsic::R600_txl:
722 TextureOp = 2;
723 break;
724 case AMDGPUIntrinsic::R600_txlc:
725 TextureOp = 3;
726 break;
727 case AMDGPUIntrinsic::R600_txb:
728 TextureOp = 4;
729 break;
730 case AMDGPUIntrinsic::R600_txbc:
731 TextureOp = 5;
732 break;
733 case AMDGPUIntrinsic::R600_txf:
734 TextureOp = 6;
735 break;
736 case AMDGPUIntrinsic::R600_txq:
737 TextureOp = 7;
738 break;
739 case AMDGPUIntrinsic::R600_ddx:
740 TextureOp = 8;
741 break;
742 case AMDGPUIntrinsic::R600_ddy:
743 TextureOp = 9;
744 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000745 case AMDGPUIntrinsic::R600_ldptr:
746 TextureOp = 10;
747 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000748 default:
749 llvm_unreachable("Unknow Texture Operation");
750 }
751
752 SDValue TexArgs[19] = {
Daniel Jasper48e93f72015-04-28 13:38:35 +0000753 DAG.getConstant(TextureOp, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000754 Op.getOperand(1),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000755 DAG.getConstant(0, MVT::i32),
756 DAG.getConstant(1, MVT::i32),
757 DAG.getConstant(2, MVT::i32),
758 DAG.getConstant(3, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000759 Op.getOperand(2),
760 Op.getOperand(3),
761 Op.getOperand(4),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000762 DAG.getConstant(0, MVT::i32),
763 DAG.getConstant(1, MVT::i32),
764 DAG.getConstant(2, MVT::i32),
765 DAG.getConstant(3, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000766 Op.getOperand(5),
767 Op.getOperand(6),
768 Op.getOperand(7),
769 Op.getOperand(8),
770 Op.getOperand(9),
771 Op.getOperand(10)
772 };
Craig Topper48d114b2014-04-26 18:35:24 +0000773 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000774 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000775 case AMDGPUIntrinsic::AMDGPU_dp4: {
776 SDValue Args[8] = {
777 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000778 DAG.getConstant(0, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000780 DAG.getConstant(0, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000782 DAG.getConstant(1, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000784 DAG.getConstant(1, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000786 DAG.getConstant(2, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000788 DAG.getConstant(2, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000789 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000790 DAG.getConstant(3, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000791 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Daniel Jasper48e93f72015-04-28 13:38:35 +0000792 DAG.getConstant(3, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000793 };
Craig Topper48d114b2014-04-26 18:35:24 +0000794 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000795 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000796
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000813 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 return LowerImplicitParameter(DAG, VT, DL, 8);
815
Jan Veselye5121f32014-10-14 20:05:26 +0000816 case Intrinsic::AMDGPU_read_workdim:
817 return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
818
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000819 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000820 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
821 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000822 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000823 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
824 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000825 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000826 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
827 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000828 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000829 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
830 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000831 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000832 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
833 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000834 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000835 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
836 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000837 case Intrinsic::AMDGPU_rsq:
838 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
839 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Marek Olsak43650e42015-03-24 13:40:08 +0000840
841 case AMDGPUIntrinsic::AMDGPU_fract:
842 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
843 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000844 }
845 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
846 break;
847 }
848 } // end switch(Op.getOpcode())
849 return SDValue();
850}
851
852void R600TargetLowering::ReplaceNodeResults(SDNode *N,
853 SmallVectorImpl<SDValue> &Results,
854 SelectionDAG &DAG) const {
855 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000856 default:
857 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
858 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000859 case ISD::FP_TO_UINT:
860 if (N->getValueType(0) == MVT::i1) {
861 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
862 return;
863 }
864 // Fall-through. Since we don't care about out of bounds values
865 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
866 // considers some extra cases which are not necessary here.
867 case ISD::FP_TO_SINT: {
868 SDValue Result;
869 if (expandFP_TO_SINT(N, Result, DAG))
870 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000871 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000872 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000873 case ISD::UDIV: {
874 SDValue Op = SDValue(N, 0);
875 SDLoc DL(Op);
876 EVT VT = Op.getValueType();
877 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
878 N->getOperand(0), N->getOperand(1));
879 Results.push_back(UDIVREM);
880 break;
881 }
882 case ISD::UREM: {
883 SDValue Op = SDValue(N, 0);
884 SDLoc DL(Op);
885 EVT VT = Op.getValueType();
886 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
887 N->getOperand(0), N->getOperand(1));
888 Results.push_back(UDIVREM.getValue(1));
889 break;
890 }
891 case ISD::SDIV: {
892 SDValue Op = SDValue(N, 0);
893 SDLoc DL(Op);
894 EVT VT = Op.getValueType();
895 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
896 N->getOperand(0), N->getOperand(1));
897 Results.push_back(SDIVREM);
898 break;
899 }
900 case ISD::SREM: {
901 SDValue Op = SDValue(N, 0);
902 SDLoc DL(Op);
903 EVT VT = Op.getValueType();
904 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
905 N->getOperand(0), N->getOperand(1));
906 Results.push_back(SDIVREM.getValue(1));
907 break;
908 }
909 case ISD::SDIVREM: {
910 SDValue Op = SDValue(N, 1);
911 SDValue RES = LowerSDIVREM(Op, DAG);
912 Results.push_back(RES);
913 Results.push_back(RES.getValue(1));
914 break;
915 }
916 case ISD::UDIVREM: {
917 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000918 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000919 break;
920 }
921 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000922}
923
Tom Stellard880a80a2014-06-17 16:53:14 +0000924SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
925 SDValue Vector) const {
926
927 SDLoc DL(Vector);
928 EVT VecVT = Vector.getValueType();
929 EVT EltVT = VecVT.getVectorElementType();
930 SmallVector<SDValue, 8> Args;
931
932 for (unsigned i = 0, e = VecVT.getVectorNumElements();
933 i != e; ++i) {
Daniel Jasper48e93f72015-04-28 13:38:35 +0000934 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
935 Vector, DAG.getConstant(i, getVectorIdxTy())));
Tom Stellard880a80a2014-06-17 16:53:14 +0000936 }
937
938 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
939}
940
941SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
942 SelectionDAG &DAG) const {
943
944 SDLoc DL(Op);
945 SDValue Vector = Op.getOperand(0);
946 SDValue Index = Op.getOperand(1);
947
948 if (isa<ConstantSDNode>(Index) ||
949 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
950 return Op;
951
952 Vector = vectorToVerticalVector(DAG, Vector);
953 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
954 Vector, Index);
955}
956
957SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
958 SelectionDAG &DAG) const {
959 SDLoc DL(Op);
960 SDValue Vector = Op.getOperand(0);
961 SDValue Value = Op.getOperand(1);
962 SDValue Index = Op.getOperand(2);
963
964 if (isa<ConstantSDNode>(Index) ||
965 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
966 return Op;
967
968 Vector = vectorToVerticalVector(DAG, Vector);
969 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
970 Vector, Value, Index);
971 return vectorToVerticalVector(DAG, Insert);
972}
973
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000974SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
975 // On hw >= R700, COS/SIN input must be between -1. and 1.
976 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
977 EVT VT = Op.getValueType();
978 SDValue Arg = Op.getOperand(0);
Daniel Jasper48e93f72015-04-28 13:38:35 +0000979 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
980 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
981 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
982 DAG.getConstantFP(0.15915494309, MVT::f32)),
983 DAG.getConstantFP(0.5, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000984 unsigned TrigNode;
985 switch (Op.getOpcode()) {
986 case ISD::FCOS:
987 TrigNode = AMDGPUISD::COS_HW;
988 break;
989 case ISD::FSIN:
990 TrigNode = AMDGPUISD::SIN_HW;
991 break;
992 default:
993 llvm_unreachable("Wrong trig opcode");
994 }
Daniel Jasper48e93f72015-04-28 13:38:35 +0000995 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
996 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
997 DAG.getConstantFP(-0.5, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000998 if (Gen >= AMDGPUSubtarget::R700)
999 return TrigVal;
1000 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Daniel Jasper48e93f72015-04-28 13:38:35 +00001001 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1002 DAG.getConstantFP(3.14159265359, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001003}
1004
Jan Vesely25f36272014-06-18 12:27:13 +00001005SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1006 SDLoc DL(Op);
1007 EVT VT = Op.getValueType();
1008
1009 SDValue Lo = Op.getOperand(0);
1010 SDValue Hi = Op.getOperand(1);
1011 SDValue Shift = Op.getOperand(2);
Daniel Jasper48e93f72015-04-28 13:38:35 +00001012 SDValue Zero = DAG.getConstant(0, VT);
1013 SDValue One = DAG.getConstant(1, VT);
Jan Vesely25f36272014-06-18 12:27:13 +00001014
Daniel Jasper48e93f72015-04-28 13:38:35 +00001015 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1016 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
Jan Vesely25f36272014-06-18 12:27:13 +00001017 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1018 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1019
1020 // The dance around Width1 is necessary for 0 special case.
1021 // Without it the CompShift might be 32, producing incorrect results in
1022 // Overflow. So we do the shift in two steps, the alternative is to
1023 // add a conditional to filter the special case.
1024
1025 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1026 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1027
1028 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1029 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1030 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1031
1032 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1033 SDValue LoBig = Zero;
1034
1035 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1036 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1037
1038 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1039}
1040
Jan Vesely900ff2e2014-06-18 12:27:15 +00001041SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1042 SDLoc DL(Op);
1043 EVT VT = Op.getValueType();
1044
1045 SDValue Lo = Op.getOperand(0);
1046 SDValue Hi = Op.getOperand(1);
1047 SDValue Shift = Op.getOperand(2);
Daniel Jasper48e93f72015-04-28 13:38:35 +00001048 SDValue Zero = DAG.getConstant(0, VT);
1049 SDValue One = DAG.getConstant(1, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001050
Jan Veselyecf51332014-06-18 12:27:17 +00001051 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1052
Daniel Jasper48e93f72015-04-28 13:38:35 +00001053 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1054 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001055 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1056 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1057
1058 // The dance around Width1 is necessary for 0 special case.
1059 // Without it the CompShift might be 32, producing incorrect results in
1060 // Overflow. So we do the shift in two steps, the alternative is to
1061 // add a conditional to filter the special case.
1062
1063 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1064 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1065
Jan Veselyecf51332014-06-18 12:27:17 +00001066 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001067 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1068 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1069
Jan Veselyecf51332014-06-18 12:27:17 +00001070 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1071 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001072
1073 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1074 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1075
1076 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1077}
1078
Tom Stellard75aadc22012-12-11 21:25:42 +00001079SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1080 return DAG.getNode(
1081 ISD::SETCC,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001082 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001083 MVT::i1,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001084 Op, DAG.getConstantFP(0.0f, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001085 DAG.getCondCode(ISD::SETNE)
1086 );
1087}
1088
Tom Stellard75aadc22012-12-11 21:25:42 +00001089SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001090 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001091 unsigned DwordOffset) const {
1092 unsigned ByteOffset = DwordOffset * 4;
1093 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001094 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001095
1096 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1097 assert(isInt<16>(ByteOffset));
1098
1099 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Daniel Jasper48e93f72015-04-28 13:38:35 +00001100 DAG.getConstant(ByteOffset, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001101 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1102 false, false, false, 0);
1103}
1104
Tom Stellard75aadc22012-12-11 21:25:42 +00001105bool R600TargetLowering::isZero(SDValue Op) const {
1106 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1107 return Cst->isNullValue();
1108 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1109 return CstFP->isZero();
1110 } else {
1111 return false;
1112 }
1113}
1114
1115SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001116 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001117 EVT VT = Op.getValueType();
1118
1119 SDValue LHS = Op.getOperand(0);
1120 SDValue RHS = Op.getOperand(1);
1121 SDValue True = Op.getOperand(2);
1122 SDValue False = Op.getOperand(3);
1123 SDValue CC = Op.getOperand(4);
1124 SDValue Temp;
1125
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001126 if (VT == MVT::f32) {
1127 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1128 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1129 if (MinMax)
1130 return MinMax;
1131 }
1132
Tom Stellard75aadc22012-12-11 21:25:42 +00001133 // LHS and RHS are guaranteed to be the same value type
1134 EVT CompareVT = LHS.getValueType();
1135
1136 // Check if we can lower this to a native operation.
1137
Tom Stellard2add82d2013-03-08 15:37:09 +00001138 // Try to lower to a SET* instruction:
1139 //
1140 // SET* can match the following patterns:
1141 //
Tom Stellardcd428182013-09-28 02:50:38 +00001142 // select_cc f32, f32, -1, 0, cc_supported
1143 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1144 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001145 //
1146
1147 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001148 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1149 ISD::CondCode InverseCC =
1150 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001151 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1152 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1153 std::swap(False, True);
1154 CC = DAG.getCondCode(InverseCC);
1155 } else {
1156 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1157 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1158 std::swap(False, True);
1159 std::swap(LHS, RHS);
1160 CC = DAG.getCondCode(SwapInvCC);
1161 }
1162 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001163 }
1164
1165 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1166 (CompareVT == VT || VT == MVT::i32)) {
1167 // This can be matched by a SET* instruction.
1168 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1169 }
1170
Tom Stellard75aadc22012-12-11 21:25:42 +00001171 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001172 //
1173 // CND* can match the following patterns:
1174 //
Tom Stellardcd428182013-09-28 02:50:38 +00001175 // select_cc f32, 0.0, f32, f32, cc_supported
1176 // select_cc f32, 0.0, i32, i32, cc_supported
1177 // select_cc i32, 0, f32, f32, cc_supported
1178 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001179 //
Tom Stellardcd428182013-09-28 02:50:38 +00001180
1181 // Try to move the zero value to the RHS
1182 if (isZero(LHS)) {
1183 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1184 // Try swapping the operands
1185 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1186 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1187 std::swap(LHS, RHS);
1188 CC = DAG.getCondCode(CCSwapped);
1189 } else {
1190 // Try inverting the conditon and then swapping the operands
1191 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1192 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1193 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1194 std::swap(True, False);
1195 std::swap(LHS, RHS);
1196 CC = DAG.getCondCode(CCSwapped);
1197 }
1198 }
1199 }
1200 if (isZero(RHS)) {
1201 SDValue Cond = LHS;
1202 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001203 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1204 if (CompareVT != VT) {
1205 // Bitcast True / False to the correct types. This will end up being
1206 // a nop, but it allows us to define only a single pattern in the
1207 // .TD files for each CND* instruction rather than having to have
1208 // one pattern for integer True/False and one for fp True/False
1209 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1210 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1211 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001212
1213 switch (CCOpcode) {
1214 case ISD::SETONE:
1215 case ISD::SETUNE:
1216 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001217 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1218 Temp = True;
1219 True = False;
1220 False = Temp;
1221 break;
1222 default:
1223 break;
1224 }
1225 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1226 Cond, Zero,
1227 True, False,
1228 DAG.getCondCode(CCOpcode));
1229 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1230 }
1231
Tom Stellard75aadc22012-12-11 21:25:42 +00001232 // If we make it this for it means we have no native instructions to handle
1233 // this SELECT_CC, so we must lower it.
1234 SDValue HWTrue, HWFalse;
1235
1236 if (CompareVT == MVT::f32) {
Daniel Jasper48e93f72015-04-28 13:38:35 +00001237 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1238 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001239 } else if (CompareVT == MVT::i32) {
Daniel Jasper48e93f72015-04-28 13:38:35 +00001240 HWTrue = DAG.getConstant(-1, CompareVT);
1241 HWFalse = DAG.getConstant(0, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001242 }
1243 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001244 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001245 }
1246
1247 // Lower this unsupported SELECT_CC into a combination of two supported
1248 // SELECT_CC operations.
1249 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1250
1251 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1252 Cond, HWFalse,
1253 True, False,
1254 DAG.getCondCode(ISD::SETNE));
1255}
1256
Alp Tokercb402912014-01-24 17:20:08 +00001257/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001258/// convert these pointers to a register index. Each register holds
1259/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1260/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1261/// for indirect addressing.
1262SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1263 unsigned StackWidth,
1264 SelectionDAG &DAG) const {
1265 unsigned SRLPad;
1266 switch(StackWidth) {
1267 case 1:
1268 SRLPad = 2;
1269 break;
1270 case 2:
1271 SRLPad = 3;
1272 break;
1273 case 4:
1274 SRLPad = 4;
1275 break;
1276 default: llvm_unreachable("Invalid stack width");
1277 }
1278
Daniel Jasper48e93f72015-04-28 13:38:35 +00001279 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
1280 DAG.getConstant(SRLPad, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001281}
1282
1283void R600TargetLowering::getStackAddress(unsigned StackWidth,
1284 unsigned ElemIdx,
1285 unsigned &Channel,
1286 unsigned &PtrIncr) const {
1287 switch (StackWidth) {
1288 default:
1289 case 1:
1290 Channel = 0;
1291 if (ElemIdx > 0) {
1292 PtrIncr = 1;
1293 } else {
1294 PtrIncr = 0;
1295 }
1296 break;
1297 case 2:
1298 Channel = ElemIdx % 2;
1299 if (ElemIdx == 2) {
1300 PtrIncr = 1;
1301 } else {
1302 PtrIncr = 0;
1303 }
1304 break;
1305 case 4:
1306 Channel = ElemIdx;
1307 PtrIncr = 0;
1308 break;
1309 }
1310}
1311
Tom Stellard75aadc22012-12-11 21:25:42 +00001312SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001313 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001314 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1315 SDValue Chain = Op.getOperand(0);
1316 SDValue Value = Op.getOperand(1);
1317 SDValue Ptr = Op.getOperand(2);
1318
Tom Stellard2ffc3302013-08-26 15:05:44 +00001319 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001320 if (Result.getNode()) {
1321 return Result;
1322 }
1323
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001324 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1325 if (StoreNode->isTruncatingStore()) {
1326 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001327 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001328 EVT MemVT = StoreNode->getMemoryVT();
1329 SDValue MaskConstant;
1330 if (MemVT == MVT::i8) {
Daniel Jasper48e93f72015-04-28 13:38:35 +00001331 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001332 } else {
1333 assert(MemVT == MVT::i16);
Daniel Jasper48e93f72015-04-28 13:38:35 +00001334 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001335 }
1336 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001337 DAG.getConstant(2, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001338 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001339 DAG.getConstant(0x00000003, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001340 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1341 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001342 DAG.getConstant(3, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001343 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1344 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1345 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1346 // vector instead.
1347 SDValue Src[4] = {
1348 ShiftedValue,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001349 DAG.getConstant(0, MVT::i32),
1350 DAG.getConstant(0, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001351 Mask
1352 };
Craig Topper48d114b2014-04-26 18:35:24 +00001353 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001354 SDValue Args[3] = { Chain, Input, DWordAddr };
1355 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001356 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001357 StoreNode->getMemOperand());
1358 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1359 Value.getValueType().bitsGE(MVT::i32)) {
1360 // Convert pointer from byte address to dword address.
1361 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1362 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Daniel Jasper48e93f72015-04-28 13:38:35 +00001363 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001364
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001365 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001366 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001367 } else {
1368 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1369 }
1370 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001371 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001372 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001373
1374 EVT ValueVT = Value.getValueType();
1375
1376 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1377 return SDValue();
1378 }
1379
Tom Stellarde9373602014-01-22 19:24:14 +00001380 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1381 if (Ret.getNode()) {
1382 return Ret;
1383 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001384 // Lowering for indirect addressing
1385
1386 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001387 const AMDGPUFrameLowering *TFL =
1388 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001389 unsigned StackWidth = TFL->getStackWidth(MF);
1390
1391 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1392
1393 if (ValueVT.isVector()) {
1394 unsigned NumElemVT = ValueVT.getVectorNumElements();
1395 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001396 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001397
1398 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1399 "vector width in load");
1400
1401 for (unsigned i = 0; i < NumElemVT; ++i) {
1402 unsigned Channel, PtrIncr;
1403 getStackAddress(StackWidth, i, Channel, PtrIncr);
1404 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001405 DAG.getConstant(PtrIncr, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001406 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001407 Value, DAG.getConstant(i, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001408
1409 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1410 Chain, Elem, Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001411 DAG.getTargetConstant(Channel, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001412 }
Craig Topper48d114b2014-04-26 18:35:24 +00001413 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001414 } else {
1415 if (ValueVT == MVT::i8) {
1416 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1417 }
1418 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001419 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001420 }
1421
1422 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001423}
1424
Tom Stellard365366f2013-01-23 02:09:06 +00001425// return (512 + (kc_bank << 12)
1426static int
1427ConstantAddressBlock(unsigned AddressSpace) {
1428 switch (AddressSpace) {
1429 case AMDGPUAS::CONSTANT_BUFFER_0:
1430 return 512;
1431 case AMDGPUAS::CONSTANT_BUFFER_1:
1432 return 512 + 4096;
1433 case AMDGPUAS::CONSTANT_BUFFER_2:
1434 return 512 + 4096 * 2;
1435 case AMDGPUAS::CONSTANT_BUFFER_3:
1436 return 512 + 4096 * 3;
1437 case AMDGPUAS::CONSTANT_BUFFER_4:
1438 return 512 + 4096 * 4;
1439 case AMDGPUAS::CONSTANT_BUFFER_5:
1440 return 512 + 4096 * 5;
1441 case AMDGPUAS::CONSTANT_BUFFER_6:
1442 return 512 + 4096 * 6;
1443 case AMDGPUAS::CONSTANT_BUFFER_7:
1444 return 512 + 4096 * 7;
1445 case AMDGPUAS::CONSTANT_BUFFER_8:
1446 return 512 + 4096 * 8;
1447 case AMDGPUAS::CONSTANT_BUFFER_9:
1448 return 512 + 4096 * 9;
1449 case AMDGPUAS::CONSTANT_BUFFER_10:
1450 return 512 + 4096 * 10;
1451 case AMDGPUAS::CONSTANT_BUFFER_11:
1452 return 512 + 4096 * 11;
1453 case AMDGPUAS::CONSTANT_BUFFER_12:
1454 return 512 + 4096 * 12;
1455 case AMDGPUAS::CONSTANT_BUFFER_13:
1456 return 512 + 4096 * 13;
1457 case AMDGPUAS::CONSTANT_BUFFER_14:
1458 return 512 + 4096 * 14;
1459 case AMDGPUAS::CONSTANT_BUFFER_15:
1460 return 512 + 4096 * 15;
1461 default:
1462 return -1;
1463 }
1464}
1465
1466SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1467{
1468 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001469 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001470 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1471 SDValue Chain = Op.getOperand(0);
1472 SDValue Ptr = Op.getOperand(1);
1473 SDValue LoweredLoad;
1474
Tom Stellarde9373602014-01-22 19:24:14 +00001475 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1476 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001477 SDValue Ops[2] = {
1478 Ret,
1479 Chain
1480 };
Craig Topper64941d92014-04-27 19:20:57 +00001481 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001482 }
1483
Tom Stellard067c8152014-07-21 14:01:14 +00001484 // Lower loads constant address space global variable loads
1485 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001486 isa<GlobalVariable>(GetUnderlyingObject(
1487 LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001488
1489 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1490 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1491 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001492 DAG.getConstant(2, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001493 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1494 LoadNode->getChain(), Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001495 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001496 }
Tom Stellarde9373602014-01-22 19:24:14 +00001497
Tom Stellard35bb18c2013-08-26 15:06:04 +00001498 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1499 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001500 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001501 Chain
1502 };
Craig Topper64941d92014-04-27 19:20:57 +00001503 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001504 }
1505
Tom Stellard365366f2013-01-23 02:09:06 +00001506 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001507 if (ConstantBlock > -1 &&
1508 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1509 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001510 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001511 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1512 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001513 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001514 SDValue Slots[4];
1515 for (unsigned i = 0; i < 4; i++) {
1516 // We want Const position encoded with the following formula :
1517 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1518 // const_index is Ptr computed by llvm using an alignment of 16.
1519 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1520 // then div by 4 at the ISel step
1521 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001522 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001523 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1524 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001525 EVT NewVT = MVT::v4i32;
1526 unsigned NumElements = 4;
1527 if (VT.isVector()) {
1528 NewVT = VT;
1529 NumElements = VT.getVectorNumElements();
1530 }
Craig Topper48d114b2014-04-26 18:35:24 +00001531 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001532 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001533 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001534 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001535 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001536 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
1537 DAG.getConstant(LoadNode->getAddressSpace() -
1538 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001539 );
1540 }
1541
1542 if (!VT.isVector()) {
1543 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001544 DAG.getConstant(0, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001545 }
1546
1547 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001548 Result,
1549 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001550 };
Craig Topper64941d92014-04-27 19:20:57 +00001551 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001552 }
1553
Matt Arsenault909d0c02013-10-30 23:43:29 +00001554 // For most operations returning SDValue() will result in the node being
1555 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1556 // need to manually expand loads that may be legal in some address spaces and
1557 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1558 // compute shaders, since the data is sign extended when it is uploaded to the
1559 // buffer. However SEXT loads from other address spaces are not supported, so
1560 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001561 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1562 EVT MemVT = LoadNode->getMemoryVT();
1563 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1564 SDValue ShiftAmount =
Daniel Jasper48e93f72015-04-28 13:38:35 +00001565 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
Tom Stellard84021442013-07-23 01:48:24 +00001566 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1567 LoadNode->getPointerInfo(), MemVT,
1568 LoadNode->isVolatile(),
1569 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001570 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001571 LoadNode->getAlignment());
1572 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1573 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1574
1575 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001576 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001577 }
1578
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001579 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1580 return SDValue();
1581 }
1582
1583 // Lowering for indirect addressing
1584 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001585 const AMDGPUFrameLowering *TFL =
1586 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001587 unsigned StackWidth = TFL->getStackWidth(MF);
1588
1589 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1590
1591 if (VT.isVector()) {
1592 unsigned NumElemVT = VT.getVectorNumElements();
1593 EVT ElemVT = VT.getVectorElementType();
1594 SDValue Loads[4];
1595
1596 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1597 "vector width in load");
1598
1599 for (unsigned i = 0; i < NumElemVT; ++i) {
1600 unsigned Channel, PtrIncr;
1601 getStackAddress(StackWidth, i, Channel, PtrIncr);
1602 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001603 DAG.getConstant(PtrIncr, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001604 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1605 Chain, Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001606 DAG.getTargetConstant(Channel, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001607 Op.getOperand(2));
1608 }
1609 for (unsigned i = NumElemVT; i < 4; ++i) {
1610 Loads[i] = DAG.getUNDEF(ElemVT);
1611 }
1612 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001613 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001614 } else {
1615 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1616 Chain, Ptr,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001617 DAG.getTargetConstant(0, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001618 Op.getOperand(2));
1619 }
1620
Matt Arsenault7939acd2014-04-07 16:44:24 +00001621 SDValue Ops[2] = {
1622 LoweredLoad,
1623 Chain
1624 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001625
Craig Topper64941d92014-04-27 19:20:57 +00001626 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001627}
Tom Stellard75aadc22012-12-11 21:25:42 +00001628
Matt Arsenault1d555c42014-06-23 18:00:55 +00001629SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1630 SDValue Chain = Op.getOperand(0);
1631 SDValue Cond = Op.getOperand(1);
1632 SDValue Jump = Op.getOperand(2);
1633
1634 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1635 Chain, Jump, Cond);
1636}
1637
Tom Stellard75aadc22012-12-11 21:25:42 +00001638/// XXX Only kernel functions are supported, so we can assume for now that
1639/// every function is a kernel function, but in the future we should use
1640/// separate calling conventions for kernel and non-kernel functions.
1641SDValue R600TargetLowering::LowerFormalArguments(
1642 SDValue Chain,
1643 CallingConv::ID CallConv,
1644 bool isVarArg,
1645 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001646 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001647 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001648 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001649 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1650 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001651 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001652 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001653
Tom Stellardaf775432013-10-23 00:44:32 +00001654 SmallVector<ISD::InputArg, 8> LocalIns;
1655
Matt Arsenault209a7b92014-04-18 07:40:20 +00001656 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001657
1658 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001659
Tom Stellard1e803092013-07-23 01:48:18 +00001660 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001661 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001662 const ISD::InputArg &In = Ins[i];
1663 EVT VT = In.VT;
1664 EVT MemVT = VA.getLocVT();
1665 if (!VT.isVector() && MemVT.isVector()) {
1666 // Get load source type if scalarized.
1667 MemVT = MemVT.getVectorElementType();
1668 }
Tom Stellard78e01292013-07-23 01:47:58 +00001669
Jan Veselye5121f32014-10-14 20:05:26 +00001670 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001671 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1672 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1673 InVals.push_back(Register);
1674 continue;
1675 }
1676
Tom Stellard75aadc22012-12-11 21:25:42 +00001677 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001678 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001679
Matt Arsenaultfae02982014-03-17 18:58:11 +00001680 // i64 isn't a legal type, so the register type used ends up as i32, which
1681 // isn't expected here. It attempts to create this sextload, but it ends up
1682 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1683 // for <1 x i64>.
1684
Tom Stellardacfeebf2013-07-23 01:48:05 +00001685 // The first 36 bytes of the input buffer contains information about
1686 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001687 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1688 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1689 // FIXME: This should really check the extload type, but the handling of
1690 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001691
Matt Arsenault74ef2772014-08-13 18:14:11 +00001692 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1693 Ext = ISD::SEXTLOAD;
1694 }
1695
1696 // Compute the offset from the value.
1697 // XXX - I think PartOffset should give you this, but it seems to give the
1698 // size of the register which isn't useful.
1699
Andrew Trick05938a52015-02-16 18:10:47 +00001700 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001701 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001702 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001703
1704 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1705 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001706 DAG.getConstant(Offset, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001707 DAG.getUNDEF(MVT::i32),
1708 PtrInfo,
1709 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001710
1711 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001712 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001713 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001714 }
1715 return Chain;
1716}
1717
Matt Arsenault758659232013-05-18 00:21:46 +00001718EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001719 if (!VT.isVector())
1720 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001721 return VT.changeVectorElementTypeToInteger();
1722}
1723
Matt Arsenault209a7b92014-04-18 07:40:20 +00001724static SDValue CompactSwizzlableVector(
1725 SelectionDAG &DAG, SDValue VectorEntry,
1726 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001727 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1728 assert(RemapSwizzle.empty());
1729 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001730 VectorEntry.getOperand(0),
1731 VectorEntry.getOperand(1),
1732 VectorEntry.getOperand(2),
1733 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001734 };
1735
1736 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001737 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1738 // We mask write here to teach later passes that the ith element of this
1739 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1740 // break false dependencies and additionnaly make assembly easier to read.
1741 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001742 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1743 if (C->isZero()) {
1744 RemapSwizzle[i] = 4; // SEL_0
1745 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1746 } else if (C->isExactlyValue(1.0)) {
1747 RemapSwizzle[i] = 5; // SEL_1
1748 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1749 }
1750 }
1751
1752 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1753 continue;
1754 for (unsigned j = 0; j < i; j++) {
1755 if (NewBldVec[i] == NewBldVec[j]) {
1756 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1757 RemapSwizzle[i] = j;
1758 break;
1759 }
1760 }
1761 }
1762
1763 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001764 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001765}
1766
Benjamin Kramer193960c2013-06-11 13:32:25 +00001767static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1768 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001769 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1770 assert(RemapSwizzle.empty());
1771 SDValue NewBldVec[4] = {
1772 VectorEntry.getOperand(0),
1773 VectorEntry.getOperand(1),
1774 VectorEntry.getOperand(2),
1775 VectorEntry.getOperand(3)
1776 };
1777 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001778 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001779 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001780 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1781 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1782 ->getZExtValue();
1783 if (i == Idx)
1784 isUnmovable[Idx] = true;
1785 }
1786 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001787
1788 for (unsigned i = 0; i < 4; i++) {
1789 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1790 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1791 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001792 if (isUnmovable[Idx])
1793 continue;
1794 // Swap i and Idx
1795 std::swap(NewBldVec[Idx], NewBldVec[i]);
1796 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1797 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001798 }
1799 }
1800
1801 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001802 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001803}
1804
1805
1806SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Daniel Jasper48e93f72015-04-28 13:38:35 +00001807SDValue Swz[4], SelectionDAG &DAG) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001808 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1809 // Old -> New swizzle values
1810 DenseMap<unsigned, unsigned> SwizzleRemap;
1811
1812 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1813 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001814 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001815 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Daniel Jasper48e93f72015-04-28 13:38:35 +00001816 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001817 }
1818
1819 SwizzleRemap.clear();
1820 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1821 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001822 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001823 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Daniel Jasper48e93f72015-04-28 13:38:35 +00001824 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001825 }
1826
1827 return BuildVector;
1828}
1829
1830
Tom Stellard75aadc22012-12-11 21:25:42 +00001831//===----------------------------------------------------------------------===//
1832// Custom DAG Optimizations
1833//===----------------------------------------------------------------------===//
1834
1835SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1836 DAGCombinerInfo &DCI) const {
1837 SelectionDAG &DAG = DCI.DAG;
1838
1839 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001840 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001841 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1842 case ISD::FP_ROUND: {
1843 SDValue Arg = N->getOperand(0);
1844 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001845 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001846 Arg.getOperand(0));
1847 }
1848 break;
1849 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001850
1851 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1852 // (i32 select_cc f32, f32, -1, 0 cc)
1853 //
1854 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1855 // this to one of the SET*_DX10 instructions.
1856 case ISD::FP_TO_SINT: {
1857 SDValue FNeg = N->getOperand(0);
1858 if (FNeg.getOpcode() != ISD::FNEG) {
1859 return SDValue();
1860 }
1861 SDValue SelectCC = FNeg.getOperand(0);
1862 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1863 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1864 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1865 !isHWTrueValue(SelectCC.getOperand(2)) ||
1866 !isHWFalseValue(SelectCC.getOperand(3))) {
1867 return SDValue();
1868 }
1869
Daniel Jasper48e93f72015-04-28 13:38:35 +00001870 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001871 SelectCC.getOperand(0), // LHS
1872 SelectCC.getOperand(1), // RHS
Daniel Jasper48e93f72015-04-28 13:38:35 +00001873 DAG.getConstant(-1, MVT::i32), // True
1874 DAG.getConstant(0, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001875 SelectCC.getOperand(4)); // CC
1876
1877 break;
1878 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001879
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001880 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1881 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001882 case ISD::INSERT_VECTOR_ELT: {
1883 SDValue InVec = N->getOperand(0);
1884 SDValue InVal = N->getOperand(1);
1885 SDValue EltNo = N->getOperand(2);
1886 SDLoc dl(N);
1887
1888 // If the inserted element is an UNDEF, just use the input vector.
1889 if (InVal.getOpcode() == ISD::UNDEF)
1890 return InVec;
1891
1892 EVT VT = InVec.getValueType();
1893
1894 // If we can't generate a legal BUILD_VECTOR, exit
1895 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1896 return SDValue();
1897
1898 // Check that we know which element is being inserted
1899 if (!isa<ConstantSDNode>(EltNo))
1900 return SDValue();
1901 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1902
1903 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1904 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1905 // vector elements.
1906 SmallVector<SDValue, 8> Ops;
1907 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1908 Ops.append(InVec.getNode()->op_begin(),
1909 InVec.getNode()->op_end());
1910 } else if (InVec.getOpcode() == ISD::UNDEF) {
1911 unsigned NElts = VT.getVectorNumElements();
1912 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1913 } else {
1914 return SDValue();
1915 }
1916
1917 // Insert the element
1918 if (Elt < Ops.size()) {
1919 // All the operands of BUILD_VECTOR must have the same type;
1920 // we enforce that here.
1921 EVT OpVT = Ops[0].getValueType();
1922 if (InVal.getValueType() != OpVT)
1923 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1924 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1925 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1926 Ops[Elt] = InVal;
1927 }
1928
1929 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001930 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001931 }
1932
Tom Stellard365366f2013-01-23 02:09:06 +00001933 // Extract_vec (Build_vector) generated by custom lowering
1934 // also needs to be customly combined
1935 case ISD::EXTRACT_VECTOR_ELT: {
1936 SDValue Arg = N->getOperand(0);
1937 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1938 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1939 unsigned Element = Const->getZExtValue();
1940 return Arg->getOperand(Element);
1941 }
1942 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001943 if (Arg.getOpcode() == ISD::BITCAST &&
1944 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1945 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1946 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001947 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001948 Arg->getOperand(0).getOperand(Element));
1949 }
1950 }
Tom Stellard365366f2013-01-23 02:09:06 +00001951 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001952
1953 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001954 // Try common optimizations
1955 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1956 if (Ret.getNode())
1957 return Ret;
1958
Tom Stellarde06163a2013-02-07 14:02:35 +00001959 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1960 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001961 //
1962 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1963 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001964 SDValue LHS = N->getOperand(0);
1965 if (LHS.getOpcode() != ISD::SELECT_CC) {
1966 return SDValue();
1967 }
1968
1969 SDValue RHS = N->getOperand(1);
1970 SDValue True = N->getOperand(2);
1971 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001972 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001973
1974 if (LHS.getOperand(2).getNode() != True.getNode() ||
1975 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001976 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001977 return SDValue();
1978 }
1979
Tom Stellard5e524892013-03-08 15:37:11 +00001980 switch (NCC) {
1981 default: return SDValue();
1982 case ISD::SETNE: return LHS;
1983 case ISD::SETEQ: {
1984 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1985 LHSCC = ISD::getSetCCInverse(LHSCC,
1986 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001987 if (DCI.isBeforeLegalizeOps() ||
1988 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1989 return DAG.getSelectCC(SDLoc(N),
1990 LHS.getOperand(0),
1991 LHS.getOperand(1),
1992 LHS.getOperand(2),
1993 LHS.getOperand(3),
1994 LHSCC);
1995 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001996 }
Tom Stellard5e524892013-03-08 15:37:11 +00001997 }
Tom Stellardcd428182013-09-28 02:50:38 +00001998 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001999 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002000
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002001 case AMDGPUISD::EXPORT: {
2002 SDValue Arg = N->getOperand(1);
2003 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2004 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002005
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002006 SDValue NewArgs[8] = {
2007 N->getOperand(0), // Chain
2008 SDValue(),
2009 N->getOperand(2), // ArrayBase
2010 N->getOperand(3), // Type
2011 N->getOperand(4), // SWZ_X
2012 N->getOperand(5), // SWZ_Y
2013 N->getOperand(6), // SWZ_Z
2014 N->getOperand(7) // SWZ_W
2015 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002016 SDLoc DL(N);
Daniel Jasper48e93f72015-04-28 13:38:35 +00002017 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002018 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002019 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002020 case AMDGPUISD::TEXTURE_FETCH: {
2021 SDValue Arg = N->getOperand(1);
2022 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2023 break;
2024
2025 SDValue NewArgs[19] = {
2026 N->getOperand(0),
2027 N->getOperand(1),
2028 N->getOperand(2),
2029 N->getOperand(3),
2030 N->getOperand(4),
2031 N->getOperand(5),
2032 N->getOperand(6),
2033 N->getOperand(7),
2034 N->getOperand(8),
2035 N->getOperand(9),
2036 N->getOperand(10),
2037 N->getOperand(11),
2038 N->getOperand(12),
2039 N->getOperand(13),
2040 N->getOperand(14),
2041 N->getOperand(15),
2042 N->getOperand(16),
2043 N->getOperand(17),
2044 N->getOperand(18),
2045 };
Daniel Jasper48e93f72015-04-28 13:38:35 +00002046 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2047 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
2048 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002049 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002050 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002051
2052 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002053}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002054
2055static bool
2056FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002057 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002058 const R600InstrInfo *TII =
2059 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002060 if (!Src.isMachineOpcode())
2061 return false;
2062 switch (Src.getMachineOpcode()) {
2063 case AMDGPU::FNEG_R600:
2064 if (!Neg.getNode())
2065 return false;
2066 Src = Src.getOperand(0);
Daniel Jasper48e93f72015-04-28 13:38:35 +00002067 Neg = DAG.getTargetConstant(1, MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002068 return true;
2069 case AMDGPU::FABS_R600:
2070 if (!Abs.getNode())
2071 return false;
2072 Src = Src.getOperand(0);
Daniel Jasper48e93f72015-04-28 13:38:35 +00002073 Abs = DAG.getTargetConstant(1, MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002074 return true;
2075 case AMDGPU::CONST_COPY: {
2076 unsigned Opcode = ParentNode->getMachineOpcode();
2077 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2078
2079 if (!Sel.getNode())
2080 return false;
2081
2082 SDValue CstOffset = Src.getOperand(0);
2083 if (ParentNode->getValueType(0).isVector())
2084 return false;
2085
2086 // Gather constants values
2087 int SrcIndices[] = {
2088 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2089 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2090 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2091 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2092 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2093 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2094 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2095 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2096 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2097 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2098 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2099 };
2100 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002101 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002102 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2103 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2104 continue;
2105 if (HasDst) {
2106 OtherSrcIdx--;
2107 OtherSelIdx--;
2108 }
2109 if (RegisterSDNode *Reg =
2110 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2111 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002112 ConstantSDNode *Cst
2113 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002114 Consts.push_back(Cst->getZExtValue());
2115 }
2116 }
2117 }
2118
Matt Arsenault37c12d72014-05-12 20:42:57 +00002119 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002120 Consts.push_back(Cst->getZExtValue());
2121 if (!TII->fitsConstReadLimitations(Consts)) {
2122 return false;
2123 }
2124
2125 Sel = CstOffset;
2126 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2127 return true;
2128 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002129 case AMDGPU::MOV_IMM_I32:
2130 case AMDGPU::MOV_IMM_F32: {
2131 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2132 uint64_t ImmValue = 0;
2133
2134
2135 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2136 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2137 float FloatValue = FPC->getValueAPF().convertToFloat();
2138 if (FloatValue == 0.0) {
2139 ImmReg = AMDGPU::ZERO;
2140 } else if (FloatValue == 0.5) {
2141 ImmReg = AMDGPU::HALF;
2142 } else if (FloatValue == 1.0) {
2143 ImmReg = AMDGPU::ONE;
2144 } else {
2145 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2146 }
2147 } else {
2148 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2149 uint64_t Value = C->getZExtValue();
2150 if (Value == 0) {
2151 ImmReg = AMDGPU::ZERO;
2152 } else if (Value == 1) {
2153 ImmReg = AMDGPU::ONE_INT;
2154 } else {
2155 ImmValue = Value;
2156 }
2157 }
2158
2159 // Check that we aren't already using an immediate.
2160 // XXX: It's possible for an instruction to have more than one
2161 // immediate operand, but this is not supported yet.
2162 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2163 if (!Imm.getNode())
2164 return false;
2165 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2166 assert(C);
2167 if (C->getZExtValue())
2168 return false;
Daniel Jasper48e93f72015-04-28 13:38:35 +00002169 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002170 }
2171 Src = DAG.getRegister(ImmReg, MVT::i32);
2172 return true;
2173 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002174 default:
2175 return false;
2176 }
2177}
2178
2179
2180/// \brief Fold the instructions after selecting them
2181SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2182 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002183 const R600InstrInfo *TII =
2184 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002185 if (!Node->isMachineOpcode())
2186 return Node;
2187 unsigned Opcode = Node->getMachineOpcode();
2188 SDValue FakeOp;
2189
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002190 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002191
2192 if (Opcode == AMDGPU::DOT_4) {
2193 int OperandIdx[] = {
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002202 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002203 int NegIdx[] = {
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2212 };
2213 int AbsIdx[] = {
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2222 };
2223 for (unsigned i = 0; i < 8; i++) {
2224 if (OperandIdx[i] < 0)
2225 return Node;
2226 SDValue &Src = Ops[OperandIdx[i] - 1];
2227 SDValue &Neg = Ops[NegIdx[i] - 1];
2228 SDValue &Abs = Ops[AbsIdx[i] - 1];
2229 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2230 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2231 if (HasDst)
2232 SelIdx--;
2233 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002234 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2235 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2236 }
2237 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2238 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2239 SDValue &Src = Ops[i];
2240 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002241 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2242 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002243 } else if (Opcode == AMDGPU::CLAMP_R600) {
2244 SDValue Src = Node->getOperand(0);
2245 if (!Src.isMachineOpcode() ||
2246 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2247 return Node;
2248 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2249 AMDGPU::OpName::clamp);
2250 if (ClampIdx < 0)
2251 return Node;
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002252 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Daniel Jasper48e93f72015-04-28 13:38:35 +00002253 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2254 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2255 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002256 } else {
2257 if (!TII->hasInstrModifiers(Opcode))
2258 return Node;
2259 int OperandIdx[] = {
2260 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2261 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2262 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2263 };
2264 int NegIdx[] = {
2265 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2266 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2267 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2268 };
2269 int AbsIdx[] = {
2270 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2271 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2272 -1
2273 };
2274 for (unsigned i = 0; i < 3; i++) {
2275 if (OperandIdx[i] < 0)
2276 return Node;
2277 SDValue &Src = Ops[OperandIdx[i] - 1];
2278 SDValue &Neg = Ops[NegIdx[i] - 1];
2279 SDValue FakeAbs;
2280 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2281 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2282 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002283 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2284 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002285 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002286 ImmIdx--;
2287 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002288 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002289 SDValue &Imm = Ops[ImmIdx];
2290 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002291 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2292 }
2293 }
2294
2295 return Node;
2296}