blob: ecda332ed6647ca9ba5f49e609f258c7eba64f96 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
193MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
194 MachineInstr * MI, MachineBasicBlock * BB) const {
195 MachineFunction * MF = BB->getParent();
196 MachineRegisterInfo &MRI = MF->getRegInfo();
197 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000198 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000199 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000200
201 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000202 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000203 // Replace LDS_*_RET instruction that don't have any uses with the
204 // equivalent LDS_*_NORET instruction.
205 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000206 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
207 assert(DstIdx != -1);
208 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000209 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
210 // LDS_1A2D support and remove this special case.
211 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
212 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000213 return BB;
214
215 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
216 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000217 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
218 NewMI.addOperand(MI->getOperand(i));
219 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000220 } else {
221 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
222 }
223 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000224 case AMDGPU::CLAMP_R600: {
225 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
226 AMDGPU::MOV,
227 MI->getOperand(0).getReg(),
228 MI->getOperand(1).getReg());
229 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
230 break;
231 }
232
233 case AMDGPU::FABS_R600: {
234 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
235 AMDGPU::MOV,
236 MI->getOperand(0).getReg(),
237 MI->getOperand(1).getReg());
238 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
239 break;
240 }
241
242 case AMDGPU::FNEG_R600: {
243 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
244 AMDGPU::MOV,
245 MI->getOperand(0).getReg(),
246 MI->getOperand(1).getReg());
247 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
248 break;
249 }
250
Tom Stellard75aadc22012-12-11 21:25:42 +0000251 case AMDGPU::MASK_WRITE: {
252 unsigned maskedRegister = MI->getOperand(0).getReg();
253 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
254 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
255 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
256 break;
257 }
258
259 case AMDGPU::MOV_IMM_F32:
260 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
261 MI->getOperand(1).getFPImm()->getValueAPF()
262 .bitcastToAPInt().getZExtValue());
263 break;
264 case AMDGPU::MOV_IMM_I32:
265 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
266 MI->getOperand(1).getImm());
267 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000268 case AMDGPU::CONST_COPY: {
269 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
270 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000271 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 MI->getOperand(1).getImm());
273 break;
274 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000275
276 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000277 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000278 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000279 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000280
281 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
282 .addOperand(MI->getOperand(0))
283 .addOperand(MI->getOperand(1))
284 .addImm(EOP); // Set End of program bit
285 break;
286 }
287
Tom Stellard75aadc22012-12-11 21:25:42 +0000288 case AMDGPU::TXD: {
289 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
290 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000291 MachineOperand &RID = MI->getOperand(4);
292 MachineOperand &SID = MI->getOperand(5);
293 unsigned TextureId = MI->getOperand(6).getImm();
294 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
295 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000296
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000297 switch (TextureId) {
298 case 5: // Rect
299 CTX = CTY = 0;
300 break;
301 case 6: // Shadow1D
302 SrcW = SrcZ;
303 break;
304 case 7: // Shadow2D
305 SrcW = SrcZ;
306 break;
307 case 8: // ShadowRect
308 CTX = CTY = 0;
309 SrcW = SrcZ;
310 break;
311 case 9: // 1DArray
312 SrcZ = SrcY;
313 CTZ = 0;
314 break;
315 case 10: // 2DArray
316 CTZ = 0;
317 break;
318 case 11: // Shadow1DArray
319 SrcZ = SrcY;
320 CTZ = 0;
321 break;
322 case 12: // Shadow2DArray
323 CTZ = 0;
324 break;
325 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 .addImm(SrcX)
348 .addImm(SrcY)
349 .addImm(SrcZ)
350 .addImm(SrcW)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(1)
356 .addImm(2)
357 .addImm(3)
358 .addOperand(RID)
359 .addOperand(SID)
360 .addImm(CTX)
361 .addImm(CTY)
362 .addImm(CTZ)
363 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
365 .addOperand(MI->getOperand(0))
366 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 .addReg(T0, RegState::Implicit)
385 .addReg(T1, RegState::Implicit);
386 break;
387 }
388
389 case AMDGPU::TXD_SHADOW: {
390 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
391 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000392 MachineOperand &RID = MI->getOperand(4);
393 MachineOperand &SID = MI->getOperand(5);
394 unsigned TextureId = MI->getOperand(6).getImm();
395 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
396 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
397
398 switch (TextureId) {
399 case 5: // Rect
400 CTX = CTY = 0;
401 break;
402 case 6: // Shadow1D
403 SrcW = SrcZ;
404 break;
405 case 7: // Shadow2D
406 SrcW = SrcZ;
407 break;
408 case 8: // ShadowRect
409 CTX = CTY = 0;
410 SrcW = SrcZ;
411 break;
412 case 9: // 1DArray
413 SrcZ = SrcY;
414 CTZ = 0;
415 break;
416 case 10: // 2DArray
417 CTZ = 0;
418 break;
419 case 11: // Shadow1DArray
420 SrcZ = SrcY;
421 CTZ = 0;
422 break;
423 case 12: // Shadow2DArray
424 CTZ = 0;
425 break;
426 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000427
428 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
429 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000430 .addImm(SrcX)
431 .addImm(SrcY)
432 .addImm(SrcZ)
433 .addImm(SrcW)
434 .addImm(0)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(1)
439 .addImm(2)
440 .addImm(3)
441 .addOperand(RID)
442 .addOperand(SID)
443 .addImm(CTX)
444 .addImm(CTY)
445 .addImm(CTZ)
446 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
448 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000449 .addImm(SrcX)
450 .addImm(SrcY)
451 .addImm(SrcZ)
452 .addImm(SrcW)
453 .addImm(0)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(1)
458 .addImm(2)
459 .addImm(3)
460 .addOperand(RID)
461 .addOperand(SID)
462 .addImm(CTX)
463 .addImm(CTY)
464 .addImm(CTZ)
465 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
467 .addOperand(MI->getOperand(0))
468 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000469 .addImm(SrcX)
470 .addImm(SrcY)
471 .addImm(SrcZ)
472 .addImm(SrcW)
473 .addImm(0)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(1)
478 .addImm(2)
479 .addImm(3)
480 .addOperand(RID)
481 .addOperand(SID)
482 .addImm(CTX)
483 .addImm(CTY)
484 .addImm(CTZ)
485 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 .addReg(T0, RegState::Implicit)
487 .addReg(T1, RegState::Implicit);
488 break;
489 }
490
491 case AMDGPU::BRANCH:
492 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000493 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 break;
495
496 case AMDGPU::BRANCH_COND_f32: {
497 MachineInstr *NewMI =
498 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
499 AMDGPU::PREDICATE_BIT)
500 .addOperand(MI->getOperand(1))
501 .addImm(OPCODE_IS_NOT_ZERO)
502 .addImm(0); // Flags
503 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000504 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000505 .addOperand(MI->getOperand(0))
506 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
507 break;
508 }
509
510 case AMDGPU::BRANCH_COND_i32: {
511 MachineInstr *NewMI =
512 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
513 AMDGPU::PREDICATE_BIT)
514 .addOperand(MI->getOperand(1))
515 .addImm(OPCODE_IS_NOT_ZERO_INT)
516 .addImm(0); // Flags
517 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000518 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 .addOperand(MI->getOperand(0))
520 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
521 break;
522 }
523
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 case AMDGPU::EG_ExportSwz:
525 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000526 // Instruction is left unmodified if its not the last one of its type
527 bool isLastInstructionOfItsType = true;
528 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000529 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000530 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000531 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000532 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
533 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
534 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
535 .getImm();
536 if (CurrentInstExportType == InstExportType) {
537 isLastInstructionOfItsType = false;
538 break;
539 }
540 }
541 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000542 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000543 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 return BB;
545 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
546 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
547 .addOperand(MI->getOperand(0))
548 .addOperand(MI->getOperand(1))
549 .addOperand(MI->getOperand(2))
550 .addOperand(MI->getOperand(3))
551 .addOperand(MI->getOperand(4))
552 .addOperand(MI->getOperand(5))
553 .addOperand(MI->getOperand(6))
554 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000555 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 break;
557 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000558 case AMDGPU::RETURN: {
559 // RETURN instructions must have the live-out registers as implicit uses,
560 // otherwise they appear dead.
561 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
562 MachineInstrBuilder MIB(*MF, MI);
563 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
564 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
565 return BB;
566 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000567 }
568
569 MI->eraseFromParent();
570 return BB;
571}
572
573//===----------------------------------------------------------------------===//
574// Custom DAG Lowering Operations
575//===----------------------------------------------------------------------===//
576
Tom Stellard75aadc22012-12-11 21:25:42 +0000577SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000578 MachineFunction &MF = DAG.getMachineFunction();
579 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 switch (Op.getOpcode()) {
581 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000582 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
583 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000584 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000585 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000586 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000587 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
588 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000589 case ISD::FCOS:
590 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000592 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000593 case ISD::LOAD: {
594 SDValue Result = LowerLOAD(Op, DAG);
595 assert((!Result.getNode() ||
596 Result.getNode()->getNumValues() == 2) &&
597 "Load should return a value and a chain");
598 return Result;
599 }
600
Matt Arsenault1d555c42014-06-23 18:00:55 +0000601 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000602 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000603 case ISD::INTRINSIC_VOID: {
604 SDValue Chain = Op.getOperand(0);
605 unsigned IntrinsicID =
606 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
607 switch (IntrinsicID) {
608 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000609 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
610 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000611 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000612 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000614 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000615 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000616 const SDValue Args[8] = {
617 Chain,
618 Op.getOperand(2), // Export Value
619 Op.getOperand(3), // ArrayBase
620 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000621 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
622 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
623 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
624 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000625 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000626 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000628
Tom Stellard75aadc22012-12-11 21:25:42 +0000629 // default for switch(IntrinsicID)
630 default: break;
631 }
632 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
633 break;
634 }
635 case ISD::INTRINSIC_WO_CHAIN: {
636 unsigned IntrinsicID =
637 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
638 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000639 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000640 switch(IntrinsicID) {
641 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000642 case AMDGPUIntrinsic::R600_load_input: {
643 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
644 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
645 MachineFunction &MF = DAG.getMachineFunction();
646 MachineRegisterInfo &MRI = MF.getRegInfo();
647 MRI.addLiveIn(Reg);
648 return DAG.getCopyFromReg(DAG.getEntryNode(),
649 SDLoc(DAG.getEntryNode()), Reg, VT);
650 }
651
652 case AMDGPUIntrinsic::R600_interp_input: {
653 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
654 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
655 MachineSDNode *interp;
656 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000657 const R600InstrInfo *TII =
658 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000659 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000660 MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000661 return DAG.getTargetExtractSubreg(
662 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
663 DL, MVT::f32, SDValue(interp, 0));
664 }
665 MachineFunction &MF = DAG.getMachineFunction();
666 MachineRegisterInfo &MRI = MF.getRegInfo();
667 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
668 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
669 MRI.addLiveIn(RegisterI);
670 MRI.addLiveIn(RegisterJ);
671 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
672 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
673 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
674 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
675
676 if (slot % 4 < 2)
677 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000678 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000679 RegisterJNode, RegisterINode);
680 else
681 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000682 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000683 RegisterJNode, RegisterINode);
684 return SDValue(interp, slot % 2);
685 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000686 case AMDGPUIntrinsic::R600_interp_xy:
687 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000688 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000689 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000690 SDValue RegisterINode = Op.getOperand(2);
691 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000692
Vincent Lejeunef143af32013-11-11 22:10:24 +0000693 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000694 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000695 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000696 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000697 else
698 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000699 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000700 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000701 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
702 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000704 case AMDGPUIntrinsic::R600_tex:
705 case AMDGPUIntrinsic::R600_texc:
706 case AMDGPUIntrinsic::R600_txl:
707 case AMDGPUIntrinsic::R600_txlc:
708 case AMDGPUIntrinsic::R600_txb:
709 case AMDGPUIntrinsic::R600_txbc:
710 case AMDGPUIntrinsic::R600_txf:
711 case AMDGPUIntrinsic::R600_txq:
712 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000713 case AMDGPUIntrinsic::R600_ddy:
714 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000715 unsigned TextureOp;
716 switch (IntrinsicID) {
717 case AMDGPUIntrinsic::R600_tex:
718 TextureOp = 0;
719 break;
720 case AMDGPUIntrinsic::R600_texc:
721 TextureOp = 1;
722 break;
723 case AMDGPUIntrinsic::R600_txl:
724 TextureOp = 2;
725 break;
726 case AMDGPUIntrinsic::R600_txlc:
727 TextureOp = 3;
728 break;
729 case AMDGPUIntrinsic::R600_txb:
730 TextureOp = 4;
731 break;
732 case AMDGPUIntrinsic::R600_txbc:
733 TextureOp = 5;
734 break;
735 case AMDGPUIntrinsic::R600_txf:
736 TextureOp = 6;
737 break;
738 case AMDGPUIntrinsic::R600_txq:
739 TextureOp = 7;
740 break;
741 case AMDGPUIntrinsic::R600_ddx:
742 TextureOp = 8;
743 break;
744 case AMDGPUIntrinsic::R600_ddy:
745 TextureOp = 9;
746 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000747 case AMDGPUIntrinsic::R600_ldptr:
748 TextureOp = 10;
749 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000750 default:
751 llvm_unreachable("Unknow Texture Operation");
752 }
753
754 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000756 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000757 DAG.getConstant(0, DL, MVT::i32),
758 DAG.getConstant(1, DL, MVT::i32),
759 DAG.getConstant(2, DL, MVT::i32),
760 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000761 Op.getOperand(2),
762 Op.getOperand(3),
763 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000764 DAG.getConstant(0, DL, MVT::i32),
765 DAG.getConstant(1, DL, MVT::i32),
766 DAG.getConstant(2, DL, MVT::i32),
767 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000768 Op.getOperand(5),
769 Op.getOperand(6),
770 Op.getOperand(7),
771 Op.getOperand(8),
772 Op.getOperand(9),
773 Op.getOperand(10)
774 };
Craig Topper48d114b2014-04-26 18:35:24 +0000775 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000776 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000777 case AMDGPUIntrinsic::AMDGPU_dp4: {
778 SDValue Args[8] = {
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000780 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000782 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000784 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000786 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000788 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000789 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000790 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000791 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000792 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000793 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000794 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000795 };
Craig Topper48d114b2014-04-26 18:35:24 +0000796 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000797 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000798
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000813 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return LowerImplicitParameter(DAG, VT, DL, 8);
817
Tom Stellarddcb9f092015-07-09 21:20:37 +0000818 case Intrinsic::AMDGPU_read_workdim: {
819 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
820 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
821 }
Jan Veselye5121f32014-10-14 20:05:26 +0000822
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000823 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000824 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
825 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000826 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
828 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000829 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000830 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
831 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000832 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000833 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
834 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000835 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000836 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
837 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000838 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000839 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
840 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000841 case Intrinsic::AMDGPU_rsq:
842 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
843 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Marek Olsak43650e42015-03-24 13:40:08 +0000844
845 case AMDGPUIntrinsic::AMDGPU_fract:
846 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
847 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000848 }
849 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
850 break;
851 }
852 } // end switch(Op.getOpcode())
853 return SDValue();
854}
855
856void R600TargetLowering::ReplaceNodeResults(SDNode *N,
857 SmallVectorImpl<SDValue> &Results,
858 SelectionDAG &DAG) const {
859 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000860 default:
861 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
862 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000863 case ISD::FP_TO_UINT:
864 if (N->getValueType(0) == MVT::i1) {
865 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
866 return;
867 }
868 // Fall-through. Since we don't care about out of bounds values
869 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
870 // considers some extra cases which are not necessary here.
871 case ISD::FP_TO_SINT: {
872 SDValue Result;
873 if (expandFP_TO_SINT(N, Result, DAG))
874 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000875 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000876 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000877 case ISD::SDIVREM: {
878 SDValue Op = SDValue(N, 1);
879 SDValue RES = LowerSDIVREM(Op, DAG);
880 Results.push_back(RES);
881 Results.push_back(RES.getValue(1));
882 break;
883 }
884 case ISD::UDIVREM: {
885 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000886 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000887 break;
888 }
889 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000890}
891
Tom Stellard880a80a2014-06-17 16:53:14 +0000892SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
893 SDValue Vector) const {
894
895 SDLoc DL(Vector);
896 EVT VecVT = Vector.getValueType();
897 EVT EltVT = VecVT.getVectorElementType();
898 SmallVector<SDValue, 8> Args;
899
900 for (unsigned i = 0, e = VecVT.getVectorNumElements();
901 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000902 Args.push_back(DAG.getNode(
903 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
904 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000905 }
906
907 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
908}
909
910SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
911 SelectionDAG &DAG) const {
912
913 SDLoc DL(Op);
914 SDValue Vector = Op.getOperand(0);
915 SDValue Index = Op.getOperand(1);
916
917 if (isa<ConstantSDNode>(Index) ||
918 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
919 return Op;
920
921 Vector = vectorToVerticalVector(DAG, Vector);
922 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
923 Vector, Index);
924}
925
926SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
927 SelectionDAG &DAG) const {
928 SDLoc DL(Op);
929 SDValue Vector = Op.getOperand(0);
930 SDValue Value = Op.getOperand(1);
931 SDValue Index = Op.getOperand(2);
932
933 if (isa<ConstantSDNode>(Index) ||
934 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
935 return Op;
936
937 Vector = vectorToVerticalVector(DAG, Vector);
938 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
939 Vector, Value, Index);
940 return vectorToVerticalVector(DAG, Insert);
941}
942
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000943SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
944 // On hw >= R700, COS/SIN input must be between -1. and 1.
945 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
946 EVT VT = Op.getValueType();
947 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000948 SDLoc DL(Op);
949 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
950 DAG.getNode(ISD::FADD, DL, VT,
951 DAG.getNode(ISD::FMUL, DL, VT, Arg,
952 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
953 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000954 unsigned TrigNode;
955 switch (Op.getOpcode()) {
956 case ISD::FCOS:
957 TrigNode = AMDGPUISD::COS_HW;
958 break;
959 case ISD::FSIN:
960 TrigNode = AMDGPUISD::SIN_HW;
961 break;
962 default:
963 llvm_unreachable("Wrong trig opcode");
964 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000965 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
966 DAG.getNode(ISD::FADD, DL, VT, FractPart,
967 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000968 if (Gen >= AMDGPUSubtarget::R700)
969 return TrigVal;
970 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000971 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
972 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000973}
974
Jan Vesely25f36272014-06-18 12:27:13 +0000975SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
976 SDLoc DL(Op);
977 EVT VT = Op.getValueType();
978
979 SDValue Lo = Op.getOperand(0);
980 SDValue Hi = Op.getOperand(1);
981 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000982 SDValue Zero = DAG.getConstant(0, DL, VT);
983 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000984
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000985 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
986 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000987 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
988 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
989
990 // The dance around Width1 is necessary for 0 special case.
991 // Without it the CompShift might be 32, producing incorrect results in
992 // Overflow. So we do the shift in two steps, the alternative is to
993 // add a conditional to filter the special case.
994
995 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
996 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
997
998 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
999 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1000 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1001
1002 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1003 SDValue LoBig = Zero;
1004
1005 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1006 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1007
1008 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1009}
1010
Jan Vesely900ff2e2014-06-18 12:27:15 +00001011SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1012 SDLoc DL(Op);
1013 EVT VT = Op.getValueType();
1014
1015 SDValue Lo = Op.getOperand(0);
1016 SDValue Hi = Op.getOperand(1);
1017 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001018 SDValue Zero = DAG.getConstant(0, DL, VT);
1019 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001020
Jan Veselyecf51332014-06-18 12:27:17 +00001021 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1022
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001023 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1024 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001025 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1026 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1027
1028 // The dance around Width1 is necessary for 0 special case.
1029 // Without it the CompShift might be 32, producing incorrect results in
1030 // Overflow. So we do the shift in two steps, the alternative is to
1031 // add a conditional to filter the special case.
1032
1033 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1034 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1035
Jan Veselyecf51332014-06-18 12:27:17 +00001036 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001037 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1038 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1039
Jan Veselyecf51332014-06-18 12:27:17 +00001040 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1041 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001042
1043 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1044 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1045
1046 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1047}
1048
Jan Vesely808fff52015-04-30 17:15:56 +00001049SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1050 unsigned mainop, unsigned ovf) const {
1051 SDLoc DL(Op);
1052 EVT VT = Op.getValueType();
1053
1054 SDValue Lo = Op.getOperand(0);
1055 SDValue Hi = Op.getOperand(1);
1056
1057 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1058 // Extend sign.
1059 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1060 DAG.getValueType(MVT::i1));
1061
1062 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1063
1064 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1065}
1066
Tom Stellard75aadc22012-12-11 21:25:42 +00001067SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001068 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001069 return DAG.getNode(
1070 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001071 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001072 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001073 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001074 DAG.getCondCode(ISD::SETNE)
1075 );
1076}
1077
Tom Stellard75aadc22012-12-11 21:25:42 +00001078SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001079 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001080 unsigned DwordOffset) const {
1081 unsigned ByteOffset = DwordOffset * 4;
1082 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001083 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001084
1085 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1086 assert(isInt<16>(ByteOffset));
1087
1088 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001089 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001090 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1091 false, false, false, 0);
1092}
1093
Tom Stellard75aadc22012-12-11 21:25:42 +00001094bool R600TargetLowering::isZero(SDValue Op) const {
1095 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1096 return Cst->isNullValue();
1097 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1098 return CstFP->isZero();
1099 } else {
1100 return false;
1101 }
1102}
1103
1104SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001105 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001106 EVT VT = Op.getValueType();
1107
1108 SDValue LHS = Op.getOperand(0);
1109 SDValue RHS = Op.getOperand(1);
1110 SDValue True = Op.getOperand(2);
1111 SDValue False = Op.getOperand(3);
1112 SDValue CC = Op.getOperand(4);
1113 SDValue Temp;
1114
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001115 if (VT == MVT::f32) {
1116 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1117 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1118 if (MinMax)
1119 return MinMax;
1120 }
1121
Tom Stellard75aadc22012-12-11 21:25:42 +00001122 // LHS and RHS are guaranteed to be the same value type
1123 EVT CompareVT = LHS.getValueType();
1124
1125 // Check if we can lower this to a native operation.
1126
Tom Stellard2add82d2013-03-08 15:37:09 +00001127 // Try to lower to a SET* instruction:
1128 //
1129 // SET* can match the following patterns:
1130 //
Tom Stellardcd428182013-09-28 02:50:38 +00001131 // select_cc f32, f32, -1, 0, cc_supported
1132 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1133 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001134 //
1135
1136 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001137 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1138 ISD::CondCode InverseCC =
1139 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001140 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1141 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1142 std::swap(False, True);
1143 CC = DAG.getCondCode(InverseCC);
1144 } else {
1145 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1146 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1147 std::swap(False, True);
1148 std::swap(LHS, RHS);
1149 CC = DAG.getCondCode(SwapInvCC);
1150 }
1151 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001152 }
1153
1154 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1155 (CompareVT == VT || VT == MVT::i32)) {
1156 // This can be matched by a SET* instruction.
1157 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1158 }
1159
Tom Stellard75aadc22012-12-11 21:25:42 +00001160 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001161 //
1162 // CND* can match the following patterns:
1163 //
Tom Stellardcd428182013-09-28 02:50:38 +00001164 // select_cc f32, 0.0, f32, f32, cc_supported
1165 // select_cc f32, 0.0, i32, i32, cc_supported
1166 // select_cc i32, 0, f32, f32, cc_supported
1167 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001168 //
Tom Stellardcd428182013-09-28 02:50:38 +00001169
1170 // Try to move the zero value to the RHS
1171 if (isZero(LHS)) {
1172 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1173 // Try swapping the operands
1174 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1175 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1176 std::swap(LHS, RHS);
1177 CC = DAG.getCondCode(CCSwapped);
1178 } else {
1179 // Try inverting the conditon and then swapping the operands
1180 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1181 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1182 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1183 std::swap(True, False);
1184 std::swap(LHS, RHS);
1185 CC = DAG.getCondCode(CCSwapped);
1186 }
1187 }
1188 }
1189 if (isZero(RHS)) {
1190 SDValue Cond = LHS;
1191 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001192 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1193 if (CompareVT != VT) {
1194 // Bitcast True / False to the correct types. This will end up being
1195 // a nop, but it allows us to define only a single pattern in the
1196 // .TD files for each CND* instruction rather than having to have
1197 // one pattern for integer True/False and one for fp True/False
1198 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1199 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1200 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001201
1202 switch (CCOpcode) {
1203 case ISD::SETONE:
1204 case ISD::SETUNE:
1205 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001206 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1207 Temp = True;
1208 True = False;
1209 False = Temp;
1210 break;
1211 default:
1212 break;
1213 }
1214 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1215 Cond, Zero,
1216 True, False,
1217 DAG.getCondCode(CCOpcode));
1218 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1219 }
1220
Tom Stellard75aadc22012-12-11 21:25:42 +00001221 // If we make it this for it means we have no native instructions to handle
1222 // this SELECT_CC, so we must lower it.
1223 SDValue HWTrue, HWFalse;
1224
1225 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001226 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1227 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001228 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001229 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1230 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001231 }
1232 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001233 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001234 }
1235
1236 // Lower this unsupported SELECT_CC into a combination of two supported
1237 // SELECT_CC operations.
1238 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1239
1240 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1241 Cond, HWFalse,
1242 True, False,
1243 DAG.getCondCode(ISD::SETNE));
1244}
1245
Alp Tokercb402912014-01-24 17:20:08 +00001246/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001247/// convert these pointers to a register index. Each register holds
1248/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1249/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1250/// for indirect addressing.
1251SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1252 unsigned StackWidth,
1253 SelectionDAG &DAG) const {
1254 unsigned SRLPad;
1255 switch(StackWidth) {
1256 case 1:
1257 SRLPad = 2;
1258 break;
1259 case 2:
1260 SRLPad = 3;
1261 break;
1262 case 4:
1263 SRLPad = 4;
1264 break;
1265 default: llvm_unreachable("Invalid stack width");
1266 }
1267
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001268 SDLoc DL(Ptr);
1269 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1270 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001271}
1272
1273void R600TargetLowering::getStackAddress(unsigned StackWidth,
1274 unsigned ElemIdx,
1275 unsigned &Channel,
1276 unsigned &PtrIncr) const {
1277 switch (StackWidth) {
1278 default:
1279 case 1:
1280 Channel = 0;
1281 if (ElemIdx > 0) {
1282 PtrIncr = 1;
1283 } else {
1284 PtrIncr = 0;
1285 }
1286 break;
1287 case 2:
1288 Channel = ElemIdx % 2;
1289 if (ElemIdx == 2) {
1290 PtrIncr = 1;
1291 } else {
1292 PtrIncr = 0;
1293 }
1294 break;
1295 case 4:
1296 Channel = ElemIdx;
1297 PtrIncr = 0;
1298 break;
1299 }
1300}
1301
Tom Stellard75aadc22012-12-11 21:25:42 +00001302SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001303 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001304 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1305 SDValue Chain = Op.getOperand(0);
1306 SDValue Value = Op.getOperand(1);
1307 SDValue Ptr = Op.getOperand(2);
1308
Tom Stellard2ffc3302013-08-26 15:05:44 +00001309 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001310 if (Result.getNode()) {
1311 return Result;
1312 }
1313
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001314 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1315 if (StoreNode->isTruncatingStore()) {
1316 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001317 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001318 EVT MemVT = StoreNode->getMemoryVT();
1319 SDValue MaskConstant;
1320 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001321 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001322 } else {
1323 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001324 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001325 }
1326 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001327 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001328 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001329 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001330 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1331 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001332 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001333 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1334 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1335 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1336 // vector instead.
1337 SDValue Src[4] = {
1338 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001339 DAG.getConstant(0, DL, MVT::i32),
1340 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001341 Mask
1342 };
Craig Topper48d114b2014-04-26 18:35:24 +00001343 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001344 SDValue Args[3] = { Chain, Input, DWordAddr };
1345 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001346 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001347 StoreNode->getMemOperand());
1348 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1349 Value.getValueType().bitsGE(MVT::i32)) {
1350 // Convert pointer from byte address to dword address.
1351 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1352 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001353 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001354
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001355 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001356 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001357 } else {
1358 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1359 }
1360 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001361 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001362 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001363
1364 EVT ValueVT = Value.getValueType();
1365
1366 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1367 return SDValue();
1368 }
1369
Tom Stellarde9373602014-01-22 19:24:14 +00001370 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1371 if (Ret.getNode()) {
1372 return Ret;
1373 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001374 // Lowering for indirect addressing
1375
1376 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001377 const AMDGPUFrameLowering *TFL =
1378 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001379 unsigned StackWidth = TFL->getStackWidth(MF);
1380
1381 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1382
1383 if (ValueVT.isVector()) {
1384 unsigned NumElemVT = ValueVT.getVectorNumElements();
1385 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001386 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001387
1388 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1389 "vector width in load");
1390
1391 for (unsigned i = 0; i < NumElemVT; ++i) {
1392 unsigned Channel, PtrIncr;
1393 getStackAddress(StackWidth, i, Channel, PtrIncr);
1394 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001395 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001396 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001397 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001398
1399 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1400 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001401 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001402 }
Craig Topper48d114b2014-04-26 18:35:24 +00001403 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001404 } else {
1405 if (ValueVT == MVT::i8) {
1406 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1407 }
1408 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001409 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001410 }
1411
1412 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001413}
1414
Tom Stellard365366f2013-01-23 02:09:06 +00001415// return (512 + (kc_bank << 12)
1416static int
1417ConstantAddressBlock(unsigned AddressSpace) {
1418 switch (AddressSpace) {
1419 case AMDGPUAS::CONSTANT_BUFFER_0:
1420 return 512;
1421 case AMDGPUAS::CONSTANT_BUFFER_1:
1422 return 512 + 4096;
1423 case AMDGPUAS::CONSTANT_BUFFER_2:
1424 return 512 + 4096 * 2;
1425 case AMDGPUAS::CONSTANT_BUFFER_3:
1426 return 512 + 4096 * 3;
1427 case AMDGPUAS::CONSTANT_BUFFER_4:
1428 return 512 + 4096 * 4;
1429 case AMDGPUAS::CONSTANT_BUFFER_5:
1430 return 512 + 4096 * 5;
1431 case AMDGPUAS::CONSTANT_BUFFER_6:
1432 return 512 + 4096 * 6;
1433 case AMDGPUAS::CONSTANT_BUFFER_7:
1434 return 512 + 4096 * 7;
1435 case AMDGPUAS::CONSTANT_BUFFER_8:
1436 return 512 + 4096 * 8;
1437 case AMDGPUAS::CONSTANT_BUFFER_9:
1438 return 512 + 4096 * 9;
1439 case AMDGPUAS::CONSTANT_BUFFER_10:
1440 return 512 + 4096 * 10;
1441 case AMDGPUAS::CONSTANT_BUFFER_11:
1442 return 512 + 4096 * 11;
1443 case AMDGPUAS::CONSTANT_BUFFER_12:
1444 return 512 + 4096 * 12;
1445 case AMDGPUAS::CONSTANT_BUFFER_13:
1446 return 512 + 4096 * 13;
1447 case AMDGPUAS::CONSTANT_BUFFER_14:
1448 return 512 + 4096 * 14;
1449 case AMDGPUAS::CONSTANT_BUFFER_15:
1450 return 512 + 4096 * 15;
1451 default:
1452 return -1;
1453 }
1454}
1455
1456SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1457{
1458 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001459 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001460 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1461 SDValue Chain = Op.getOperand(0);
1462 SDValue Ptr = Op.getOperand(1);
1463 SDValue LoweredLoad;
1464
Matt Arsenault8b03e6c2015-07-09 18:47:03 +00001465 if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1466 return Ret;
Tom Stellarde9373602014-01-22 19:24:14 +00001467
Tom Stellard067c8152014-07-21 14:01:14 +00001468 // Lower loads constant address space global variable loads
1469 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001470 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001471 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001472
Mehdi Amini44ede332015-07-09 02:09:04 +00001473 SDValue Ptr = DAG.getZExtOrTrunc(
1474 LoadNode->getBasePtr(), DL,
1475 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001476 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001477 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001478 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1479 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001480 DAG.getTargetConstant(0, DL, MVT::i32),
1481 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001482 }
Tom Stellarde9373602014-01-22 19:24:14 +00001483
Tom Stellard35bb18c2013-08-26 15:06:04 +00001484 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1485 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001486 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001487 Chain
1488 };
Craig Topper64941d92014-04-27 19:20:57 +00001489 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001490 }
1491
Tom Stellard365366f2013-01-23 02:09:06 +00001492 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001493 if (ConstantBlock > -1 &&
1494 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1495 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001496 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001497 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1498 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001499 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001500 SDValue Slots[4];
1501 for (unsigned i = 0; i < 4; i++) {
1502 // We want Const position encoded with the following formula :
1503 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1504 // const_index is Ptr computed by llvm using an alignment of 16.
1505 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1506 // then div by 4 at the ISel step
1507 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001508 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001509 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1510 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001511 EVT NewVT = MVT::v4i32;
1512 unsigned NumElements = 4;
1513 if (VT.isVector()) {
1514 NewVT = VT;
1515 NumElements = VT.getVectorNumElements();
1516 }
Craig Topper48d114b2014-04-26 18:35:24 +00001517 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001518 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001519 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001520 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001521 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001522 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1523 DAG.getConstant(4, DL, MVT::i32)),
1524 DAG.getConstant(LoadNode->getAddressSpace() -
1525 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001526 );
1527 }
1528
1529 if (!VT.isVector()) {
1530 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001531 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001532 }
1533
1534 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001535 Result,
1536 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001537 };
Craig Topper64941d92014-04-27 19:20:57 +00001538 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001539 }
1540
Matt Arsenault909d0c02013-10-30 23:43:29 +00001541 // For most operations returning SDValue() will result in the node being
1542 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1543 // need to manually expand loads that may be legal in some address spaces and
1544 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1545 // compute shaders, since the data is sign extended when it is uploaded to the
1546 // buffer. However SEXT loads from other address spaces are not supported, so
1547 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001548 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1549 EVT MemVT = LoadNode->getMemoryVT();
1550 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001551 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1552 LoadNode->getPointerInfo(), MemVT,
1553 LoadNode->isVolatile(),
1554 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001555 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001556 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001557 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1558 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001559
Jan Veselyb670d372015-05-26 18:07:22 +00001560 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001561 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001562 }
1563
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001564 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1565 return SDValue();
1566 }
1567
1568 // Lowering for indirect addressing
1569 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001570 const AMDGPUFrameLowering *TFL =
1571 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001572 unsigned StackWidth = TFL->getStackWidth(MF);
1573
1574 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1575
1576 if (VT.isVector()) {
1577 unsigned NumElemVT = VT.getVectorNumElements();
1578 EVT ElemVT = VT.getVectorElementType();
1579 SDValue Loads[4];
1580
1581 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1582 "vector width in load");
1583
1584 for (unsigned i = 0; i < NumElemVT; ++i) {
1585 unsigned Channel, PtrIncr;
1586 getStackAddress(StackWidth, i, Channel, PtrIncr);
1587 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001588 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001589 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1590 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001591 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001592 Op.getOperand(2));
1593 }
1594 for (unsigned i = NumElemVT; i < 4; ++i) {
1595 Loads[i] = DAG.getUNDEF(ElemVT);
1596 }
1597 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001598 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001599 } else {
1600 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1601 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001602 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001603 Op.getOperand(2));
1604 }
1605
Matt Arsenault7939acd2014-04-07 16:44:24 +00001606 SDValue Ops[2] = {
1607 LoweredLoad,
1608 Chain
1609 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001610
Craig Topper64941d92014-04-27 19:20:57 +00001611 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001612}
Tom Stellard75aadc22012-12-11 21:25:42 +00001613
Matt Arsenault1d555c42014-06-23 18:00:55 +00001614SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1615 SDValue Chain = Op.getOperand(0);
1616 SDValue Cond = Op.getOperand(1);
1617 SDValue Jump = Op.getOperand(2);
1618
1619 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1620 Chain, Jump, Cond);
1621}
1622
Tom Stellard75aadc22012-12-11 21:25:42 +00001623/// XXX Only kernel functions are supported, so we can assume for now that
1624/// every function is a kernel function, but in the future we should use
1625/// separate calling conventions for kernel and non-kernel functions.
1626SDValue R600TargetLowering::LowerFormalArguments(
1627 SDValue Chain,
1628 CallingConv::ID CallConv,
1629 bool isVarArg,
1630 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001631 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001632 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001633 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001634 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1635 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001636 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001637 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001638
Tom Stellardaf775432013-10-23 00:44:32 +00001639 SmallVector<ISD::InputArg, 8> LocalIns;
1640
Matt Arsenault209a7b92014-04-18 07:40:20 +00001641 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001642
1643 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001644
Tom Stellard1e803092013-07-23 01:48:18 +00001645 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001646 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001647 const ISD::InputArg &In = Ins[i];
1648 EVT VT = In.VT;
1649 EVT MemVT = VA.getLocVT();
1650 if (!VT.isVector() && MemVT.isVector()) {
1651 // Get load source type if scalarized.
1652 MemVT = MemVT.getVectorElementType();
1653 }
Tom Stellard78e01292013-07-23 01:47:58 +00001654
Jan Veselye5121f32014-10-14 20:05:26 +00001655 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001656 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1657 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1658 InVals.push_back(Register);
1659 continue;
1660 }
1661
Tom Stellard75aadc22012-12-11 21:25:42 +00001662 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001663 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001664
Matt Arsenaultfae02982014-03-17 18:58:11 +00001665 // i64 isn't a legal type, so the register type used ends up as i32, which
1666 // isn't expected here. It attempts to create this sextload, but it ends up
1667 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1668 // for <1 x i64>.
1669
Tom Stellardacfeebf2013-07-23 01:48:05 +00001670 // The first 36 bytes of the input buffer contains information about
1671 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001672 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1673 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1674 // FIXME: This should really check the extload type, but the handling of
1675 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001676
Matt Arsenault74ef2772014-08-13 18:14:11 +00001677 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1678 Ext = ISD::SEXTLOAD;
1679 }
1680
1681 // Compute the offset from the value.
1682 // XXX - I think PartOffset should give you this, but it seems to give the
1683 // size of the register which isn't useful.
1684
Andrew Trick05938a52015-02-16 18:10:47 +00001685 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001686 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001687 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001688
1689 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1690 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001691 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001692 DAG.getUNDEF(MVT::i32),
1693 PtrInfo,
1694 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001695
1696 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001697 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001698 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001699 }
1700 return Chain;
1701}
1702
Mehdi Amini44ede332015-07-09 02:09:04 +00001703EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1704 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001705 if (!VT.isVector())
1706 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001707 return VT.changeVectorElementTypeToInteger();
1708}
1709
Matt Arsenault209a7b92014-04-18 07:40:20 +00001710static SDValue CompactSwizzlableVector(
1711 SelectionDAG &DAG, SDValue VectorEntry,
1712 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001713 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1714 assert(RemapSwizzle.empty());
1715 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001716 VectorEntry.getOperand(0),
1717 VectorEntry.getOperand(1),
1718 VectorEntry.getOperand(2),
1719 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001720 };
1721
1722 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001723 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1724 // We mask write here to teach later passes that the ith element of this
1725 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1726 // break false dependencies and additionnaly make assembly easier to read.
1727 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001728 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1729 if (C->isZero()) {
1730 RemapSwizzle[i] = 4; // SEL_0
1731 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1732 } else if (C->isExactlyValue(1.0)) {
1733 RemapSwizzle[i] = 5; // SEL_1
1734 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1735 }
1736 }
1737
1738 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1739 continue;
1740 for (unsigned j = 0; j < i; j++) {
1741 if (NewBldVec[i] == NewBldVec[j]) {
1742 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1743 RemapSwizzle[i] = j;
1744 break;
1745 }
1746 }
1747 }
1748
1749 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001750 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001751}
1752
Benjamin Kramer193960c2013-06-11 13:32:25 +00001753static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1754 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001755 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1756 assert(RemapSwizzle.empty());
1757 SDValue NewBldVec[4] = {
1758 VectorEntry.getOperand(0),
1759 VectorEntry.getOperand(1),
1760 VectorEntry.getOperand(2),
1761 VectorEntry.getOperand(3)
1762 };
1763 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001764 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001765 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001766 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1767 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1768 ->getZExtValue();
1769 if (i == Idx)
1770 isUnmovable[Idx] = true;
1771 }
1772 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001773
1774 for (unsigned i = 0; i < 4; i++) {
1775 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1776 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1777 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001778 if (isUnmovable[Idx])
1779 continue;
1780 // Swap i and Idx
1781 std::swap(NewBldVec[Idx], NewBldVec[i]);
1782 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1783 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001784 }
1785 }
1786
1787 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001788 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001789}
1790
1791
1792SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001793 SDValue Swz[4], SelectionDAG &DAG,
1794 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001795 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1796 // Old -> New swizzle values
1797 DenseMap<unsigned, unsigned> SwizzleRemap;
1798
1799 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1800 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001801 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001802 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001803 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001804 }
1805
1806 SwizzleRemap.clear();
1807 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1808 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001809 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001810 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001811 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001812 }
1813
1814 return BuildVector;
1815}
1816
1817
Tom Stellard75aadc22012-12-11 21:25:42 +00001818//===----------------------------------------------------------------------===//
1819// Custom DAG Optimizations
1820//===----------------------------------------------------------------------===//
1821
1822SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1823 DAGCombinerInfo &DCI) const {
1824 SelectionDAG &DAG = DCI.DAG;
1825
1826 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001827 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001828 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1829 case ISD::FP_ROUND: {
1830 SDValue Arg = N->getOperand(0);
1831 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001832 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001833 Arg.getOperand(0));
1834 }
1835 break;
1836 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001837
1838 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1839 // (i32 select_cc f32, f32, -1, 0 cc)
1840 //
1841 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1842 // this to one of the SET*_DX10 instructions.
1843 case ISD::FP_TO_SINT: {
1844 SDValue FNeg = N->getOperand(0);
1845 if (FNeg.getOpcode() != ISD::FNEG) {
1846 return SDValue();
1847 }
1848 SDValue SelectCC = FNeg.getOperand(0);
1849 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1850 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1851 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1852 !isHWTrueValue(SelectCC.getOperand(2)) ||
1853 !isHWFalseValue(SelectCC.getOperand(3))) {
1854 return SDValue();
1855 }
1856
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001857 SDLoc dl(N);
1858 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001859 SelectCC.getOperand(0), // LHS
1860 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001861 DAG.getConstant(-1, dl, MVT::i32), // True
1862 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001863 SelectCC.getOperand(4)); // CC
1864
1865 break;
1866 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001867
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001868 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1869 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001870 case ISD::INSERT_VECTOR_ELT: {
1871 SDValue InVec = N->getOperand(0);
1872 SDValue InVal = N->getOperand(1);
1873 SDValue EltNo = N->getOperand(2);
1874 SDLoc dl(N);
1875
1876 // If the inserted element is an UNDEF, just use the input vector.
1877 if (InVal.getOpcode() == ISD::UNDEF)
1878 return InVec;
1879
1880 EVT VT = InVec.getValueType();
1881
1882 // If we can't generate a legal BUILD_VECTOR, exit
1883 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1884 return SDValue();
1885
1886 // Check that we know which element is being inserted
1887 if (!isa<ConstantSDNode>(EltNo))
1888 return SDValue();
1889 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1890
1891 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1892 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1893 // vector elements.
1894 SmallVector<SDValue, 8> Ops;
1895 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1896 Ops.append(InVec.getNode()->op_begin(),
1897 InVec.getNode()->op_end());
1898 } else if (InVec.getOpcode() == ISD::UNDEF) {
1899 unsigned NElts = VT.getVectorNumElements();
1900 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1901 } else {
1902 return SDValue();
1903 }
1904
1905 // Insert the element
1906 if (Elt < Ops.size()) {
1907 // All the operands of BUILD_VECTOR must have the same type;
1908 // we enforce that here.
1909 EVT OpVT = Ops[0].getValueType();
1910 if (InVal.getValueType() != OpVT)
1911 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1912 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1913 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1914 Ops[Elt] = InVal;
1915 }
1916
1917 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001918 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001919 }
1920
Tom Stellard365366f2013-01-23 02:09:06 +00001921 // Extract_vec (Build_vector) generated by custom lowering
1922 // also needs to be customly combined
1923 case ISD::EXTRACT_VECTOR_ELT: {
1924 SDValue Arg = N->getOperand(0);
1925 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1926 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1927 unsigned Element = Const->getZExtValue();
1928 return Arg->getOperand(Element);
1929 }
1930 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001931 if (Arg.getOpcode() == ISD::BITCAST &&
1932 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1933 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1934 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001935 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001936 Arg->getOperand(0).getOperand(Element));
1937 }
1938 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00001939 break;
Tom Stellard365366f2013-01-23 02:09:06 +00001940 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001941
1942 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001943 // Try common optimizations
1944 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1945 if (Ret.getNode())
1946 return Ret;
1947
Tom Stellarde06163a2013-02-07 14:02:35 +00001948 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1949 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001950 //
1951 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1952 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001953 SDValue LHS = N->getOperand(0);
1954 if (LHS.getOpcode() != ISD::SELECT_CC) {
1955 return SDValue();
1956 }
1957
1958 SDValue RHS = N->getOperand(1);
1959 SDValue True = N->getOperand(2);
1960 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001961 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001962
1963 if (LHS.getOperand(2).getNode() != True.getNode() ||
1964 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001965 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001966 return SDValue();
1967 }
1968
Tom Stellard5e524892013-03-08 15:37:11 +00001969 switch (NCC) {
1970 default: return SDValue();
1971 case ISD::SETNE: return LHS;
1972 case ISD::SETEQ: {
1973 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1974 LHSCC = ISD::getSetCCInverse(LHSCC,
1975 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001976 if (DCI.isBeforeLegalizeOps() ||
1977 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1978 return DAG.getSelectCC(SDLoc(N),
1979 LHS.getOperand(0),
1980 LHS.getOperand(1),
1981 LHS.getOperand(2),
1982 LHS.getOperand(3),
1983 LHSCC);
1984 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001985 }
Tom Stellard5e524892013-03-08 15:37:11 +00001986 }
Tom Stellardcd428182013-09-28 02:50:38 +00001987 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001988 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001989
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001990 case AMDGPUISD::EXPORT: {
1991 SDValue Arg = N->getOperand(1);
1992 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1993 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001994
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001995 SDValue NewArgs[8] = {
1996 N->getOperand(0), // Chain
1997 SDValue(),
1998 N->getOperand(2), // ArrayBase
1999 N->getOperand(3), // Type
2000 N->getOperand(4), // SWZ_X
2001 N->getOperand(5), // SWZ_Y
2002 N->getOperand(6), // SWZ_Z
2003 N->getOperand(7) // SWZ_W
2004 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002005 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002006 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002007 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002008 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002009 case AMDGPUISD::TEXTURE_FETCH: {
2010 SDValue Arg = N->getOperand(1);
2011 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2012 break;
2013
2014 SDValue NewArgs[19] = {
2015 N->getOperand(0),
2016 N->getOperand(1),
2017 N->getOperand(2),
2018 N->getOperand(3),
2019 N->getOperand(4),
2020 N->getOperand(5),
2021 N->getOperand(6),
2022 N->getOperand(7),
2023 N->getOperand(8),
2024 N->getOperand(9),
2025 N->getOperand(10),
2026 N->getOperand(11),
2027 N->getOperand(12),
2028 N->getOperand(13),
2029 N->getOperand(14),
2030 N->getOperand(15),
2031 N->getOperand(16),
2032 N->getOperand(17),
2033 N->getOperand(18),
2034 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002035 SDLoc DL(N);
2036 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2037 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002038 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002039 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002040
2041 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002042}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002043
2044static bool
2045FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002046 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002047 const R600InstrInfo *TII =
2048 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002049 if (!Src.isMachineOpcode())
2050 return false;
2051 switch (Src.getMachineOpcode()) {
2052 case AMDGPU::FNEG_R600:
2053 if (!Neg.getNode())
2054 return false;
2055 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002056 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002057 return true;
2058 case AMDGPU::FABS_R600:
2059 if (!Abs.getNode())
2060 return false;
2061 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002062 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002063 return true;
2064 case AMDGPU::CONST_COPY: {
2065 unsigned Opcode = ParentNode->getMachineOpcode();
2066 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2067
2068 if (!Sel.getNode())
2069 return false;
2070
2071 SDValue CstOffset = Src.getOperand(0);
2072 if (ParentNode->getValueType(0).isVector())
2073 return false;
2074
2075 // Gather constants values
2076 int SrcIndices[] = {
2077 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2078 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2079 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2080 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2081 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2082 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2085 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2086 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2087 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2088 };
2089 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002090 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002091 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2092 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2093 continue;
2094 if (HasDst) {
2095 OtherSrcIdx--;
2096 OtherSelIdx--;
2097 }
2098 if (RegisterSDNode *Reg =
2099 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2100 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002101 ConstantSDNode *Cst
2102 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002103 Consts.push_back(Cst->getZExtValue());
2104 }
2105 }
2106 }
2107
Matt Arsenault37c12d72014-05-12 20:42:57 +00002108 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002109 Consts.push_back(Cst->getZExtValue());
2110 if (!TII->fitsConstReadLimitations(Consts)) {
2111 return false;
2112 }
2113
2114 Sel = CstOffset;
2115 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2116 return true;
2117 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002118 case AMDGPU::MOV_IMM_I32:
2119 case AMDGPU::MOV_IMM_F32: {
2120 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2121 uint64_t ImmValue = 0;
2122
2123
2124 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2125 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2126 float FloatValue = FPC->getValueAPF().convertToFloat();
2127 if (FloatValue == 0.0) {
2128 ImmReg = AMDGPU::ZERO;
2129 } else if (FloatValue == 0.5) {
2130 ImmReg = AMDGPU::HALF;
2131 } else if (FloatValue == 1.0) {
2132 ImmReg = AMDGPU::ONE;
2133 } else {
2134 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2135 }
2136 } else {
2137 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2138 uint64_t Value = C->getZExtValue();
2139 if (Value == 0) {
2140 ImmReg = AMDGPU::ZERO;
2141 } else if (Value == 1) {
2142 ImmReg = AMDGPU::ONE_INT;
2143 } else {
2144 ImmValue = Value;
2145 }
2146 }
2147
2148 // Check that we aren't already using an immediate.
2149 // XXX: It's possible for an instruction to have more than one
2150 // immediate operand, but this is not supported yet.
2151 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2152 if (!Imm.getNode())
2153 return false;
2154 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2155 assert(C);
2156 if (C->getZExtValue())
2157 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002158 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002159 }
2160 Src = DAG.getRegister(ImmReg, MVT::i32);
2161 return true;
2162 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002163 default:
2164 return false;
2165 }
2166}
2167
2168
2169/// \brief Fold the instructions after selecting them
2170SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2171 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002172 const R600InstrInfo *TII =
2173 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002174 if (!Node->isMachineOpcode())
2175 return Node;
2176 unsigned Opcode = Node->getMachineOpcode();
2177 SDValue FakeOp;
2178
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002179 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002180
2181 if (Opcode == AMDGPU::DOT_4) {
2182 int OperandIdx[] = {
2183 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2184 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2185 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2186 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2187 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2188 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2189 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002191 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002192 int NegIdx[] = {
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2201 };
2202 int AbsIdx[] = {
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2211 };
2212 for (unsigned i = 0; i < 8; i++) {
2213 if (OperandIdx[i] < 0)
2214 return Node;
2215 SDValue &Src = Ops[OperandIdx[i] - 1];
2216 SDValue &Neg = Ops[NegIdx[i] - 1];
2217 SDValue &Abs = Ops[AbsIdx[i] - 1];
2218 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2219 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2220 if (HasDst)
2221 SelIdx--;
2222 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002223 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2224 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2225 }
2226 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2227 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2228 SDValue &Src = Ops[i];
2229 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002230 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2231 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002232 } else if (Opcode == AMDGPU::CLAMP_R600) {
2233 SDValue Src = Node->getOperand(0);
2234 if (!Src.isMachineOpcode() ||
2235 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2236 return Node;
2237 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2238 AMDGPU::OpName::clamp);
2239 if (ClampIdx < 0)
2240 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002241 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002242 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002243 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2244 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2245 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002246 } else {
2247 if (!TII->hasInstrModifiers(Opcode))
2248 return Node;
2249 int OperandIdx[] = {
2250 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2251 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2252 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2253 };
2254 int NegIdx[] = {
2255 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2256 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2257 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2258 };
2259 int AbsIdx[] = {
2260 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2261 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2262 -1
2263 };
2264 for (unsigned i = 0; i < 3; i++) {
2265 if (OperandIdx[i] < 0)
2266 return Node;
2267 SDValue &Src = Ops[OperandIdx[i] - 1];
2268 SDValue &Neg = Ops[NegIdx[i] - 1];
2269 SDValue FakeAbs;
2270 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2271 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2272 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002273 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2274 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002275 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002276 ImmIdx--;
2277 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002278 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002279 SDValue &Imm = Ops[ImmIdx];
2280 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002281 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2282 }
2283 }
2284
2285 return Node;
2286}