blob: 5699941d735e292c8b6a2e338622aa42f5956c6a [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
193MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
194 MachineInstr * MI, MachineBasicBlock * BB) const {
195 MachineFunction * MF = BB->getParent();
196 MachineRegisterInfo &MRI = MF->getRegInfo();
197 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000198 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000199 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000200
201 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000202 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000203 // Replace LDS_*_RET instruction that don't have any uses with the
204 // equivalent LDS_*_NORET instruction.
205 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000206 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
207 assert(DstIdx != -1);
208 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000209 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
210 // LDS_1A2D support and remove this special case.
211 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
212 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000213 return BB;
214
215 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
216 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000217 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
218 NewMI.addOperand(MI->getOperand(i));
219 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000220 } else {
221 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
222 }
223 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000224 case AMDGPU::CLAMP_R600: {
225 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
226 AMDGPU::MOV,
227 MI->getOperand(0).getReg(),
228 MI->getOperand(1).getReg());
229 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
230 break;
231 }
232
233 case AMDGPU::FABS_R600: {
234 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
235 AMDGPU::MOV,
236 MI->getOperand(0).getReg(),
237 MI->getOperand(1).getReg());
238 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
239 break;
240 }
241
242 case AMDGPU::FNEG_R600: {
243 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
244 AMDGPU::MOV,
245 MI->getOperand(0).getReg(),
246 MI->getOperand(1).getReg());
247 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
248 break;
249 }
250
Tom Stellard75aadc22012-12-11 21:25:42 +0000251 case AMDGPU::MASK_WRITE: {
252 unsigned maskedRegister = MI->getOperand(0).getReg();
253 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
254 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
255 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
256 break;
257 }
258
259 case AMDGPU::MOV_IMM_F32:
260 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
261 MI->getOperand(1).getFPImm()->getValueAPF()
262 .bitcastToAPInt().getZExtValue());
263 break;
264 case AMDGPU::MOV_IMM_I32:
265 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
266 MI->getOperand(1).getImm());
267 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000268 case AMDGPU::CONST_COPY: {
269 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
270 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000271 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 MI->getOperand(1).getImm());
273 break;
274 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000275
276 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000277 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000278 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000279 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000280
281 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
282 .addOperand(MI->getOperand(0))
283 .addOperand(MI->getOperand(1))
284 .addImm(EOP); // Set End of program bit
285 break;
286 }
287
Tom Stellard75aadc22012-12-11 21:25:42 +0000288 case AMDGPU::TXD: {
289 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
290 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000291 MachineOperand &RID = MI->getOperand(4);
292 MachineOperand &SID = MI->getOperand(5);
293 unsigned TextureId = MI->getOperand(6).getImm();
294 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
295 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000296
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000297 switch (TextureId) {
298 case 5: // Rect
299 CTX = CTY = 0;
300 break;
301 case 6: // Shadow1D
302 SrcW = SrcZ;
303 break;
304 case 7: // Shadow2D
305 SrcW = SrcZ;
306 break;
307 case 8: // ShadowRect
308 CTX = CTY = 0;
309 SrcW = SrcZ;
310 break;
311 case 9: // 1DArray
312 SrcZ = SrcY;
313 CTZ = 0;
314 break;
315 case 10: // 2DArray
316 CTZ = 0;
317 break;
318 case 11: // Shadow1DArray
319 SrcZ = SrcY;
320 CTZ = 0;
321 break;
322 case 12: // Shadow2DArray
323 CTZ = 0;
324 break;
325 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 .addImm(SrcX)
348 .addImm(SrcY)
349 .addImm(SrcZ)
350 .addImm(SrcW)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(1)
356 .addImm(2)
357 .addImm(3)
358 .addOperand(RID)
359 .addOperand(SID)
360 .addImm(CTX)
361 .addImm(CTY)
362 .addImm(CTZ)
363 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
365 .addOperand(MI->getOperand(0))
366 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 .addReg(T0, RegState::Implicit)
385 .addReg(T1, RegState::Implicit);
386 break;
387 }
388
389 case AMDGPU::TXD_SHADOW: {
390 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
391 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000392 MachineOperand &RID = MI->getOperand(4);
393 MachineOperand &SID = MI->getOperand(5);
394 unsigned TextureId = MI->getOperand(6).getImm();
395 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
396 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
397
398 switch (TextureId) {
399 case 5: // Rect
400 CTX = CTY = 0;
401 break;
402 case 6: // Shadow1D
403 SrcW = SrcZ;
404 break;
405 case 7: // Shadow2D
406 SrcW = SrcZ;
407 break;
408 case 8: // ShadowRect
409 CTX = CTY = 0;
410 SrcW = SrcZ;
411 break;
412 case 9: // 1DArray
413 SrcZ = SrcY;
414 CTZ = 0;
415 break;
416 case 10: // 2DArray
417 CTZ = 0;
418 break;
419 case 11: // Shadow1DArray
420 SrcZ = SrcY;
421 CTZ = 0;
422 break;
423 case 12: // Shadow2DArray
424 CTZ = 0;
425 break;
426 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000427
428 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
429 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000430 .addImm(SrcX)
431 .addImm(SrcY)
432 .addImm(SrcZ)
433 .addImm(SrcW)
434 .addImm(0)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(1)
439 .addImm(2)
440 .addImm(3)
441 .addOperand(RID)
442 .addOperand(SID)
443 .addImm(CTX)
444 .addImm(CTY)
445 .addImm(CTZ)
446 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
448 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000449 .addImm(SrcX)
450 .addImm(SrcY)
451 .addImm(SrcZ)
452 .addImm(SrcW)
453 .addImm(0)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(1)
458 .addImm(2)
459 .addImm(3)
460 .addOperand(RID)
461 .addOperand(SID)
462 .addImm(CTX)
463 .addImm(CTY)
464 .addImm(CTZ)
465 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
467 .addOperand(MI->getOperand(0))
468 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000469 .addImm(SrcX)
470 .addImm(SrcY)
471 .addImm(SrcZ)
472 .addImm(SrcW)
473 .addImm(0)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(1)
478 .addImm(2)
479 .addImm(3)
480 .addOperand(RID)
481 .addOperand(SID)
482 .addImm(CTX)
483 .addImm(CTY)
484 .addImm(CTZ)
485 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 .addReg(T0, RegState::Implicit)
487 .addReg(T1, RegState::Implicit);
488 break;
489 }
490
491 case AMDGPU::BRANCH:
492 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000493 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 break;
495
496 case AMDGPU::BRANCH_COND_f32: {
497 MachineInstr *NewMI =
498 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
499 AMDGPU::PREDICATE_BIT)
500 .addOperand(MI->getOperand(1))
501 .addImm(OPCODE_IS_NOT_ZERO)
502 .addImm(0); // Flags
503 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000504 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000505 .addOperand(MI->getOperand(0))
506 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
507 break;
508 }
509
510 case AMDGPU::BRANCH_COND_i32: {
511 MachineInstr *NewMI =
512 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
513 AMDGPU::PREDICATE_BIT)
514 .addOperand(MI->getOperand(1))
515 .addImm(OPCODE_IS_NOT_ZERO_INT)
516 .addImm(0); // Flags
517 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000518 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 .addOperand(MI->getOperand(0))
520 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
521 break;
522 }
523
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 case AMDGPU::EG_ExportSwz:
525 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000526 // Instruction is left unmodified if its not the last one of its type
527 bool isLastInstructionOfItsType = true;
528 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000529 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000530 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000531 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000532 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
533 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
534 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
535 .getImm();
536 if (CurrentInstExportType == InstExportType) {
537 isLastInstructionOfItsType = false;
538 break;
539 }
540 }
541 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000542 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000543 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 return BB;
545 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
546 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
547 .addOperand(MI->getOperand(0))
548 .addOperand(MI->getOperand(1))
549 .addOperand(MI->getOperand(2))
550 .addOperand(MI->getOperand(3))
551 .addOperand(MI->getOperand(4))
552 .addOperand(MI->getOperand(5))
553 .addOperand(MI->getOperand(6))
554 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000555 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 break;
557 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000558 case AMDGPU::RETURN: {
559 // RETURN instructions must have the live-out registers as implicit uses,
560 // otherwise they appear dead.
561 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
562 MachineInstrBuilder MIB(*MF, MI);
563 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
564 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
565 return BB;
566 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000567 }
568
569 MI->eraseFromParent();
570 return BB;
571}
572
573//===----------------------------------------------------------------------===//
574// Custom DAG Lowering Operations
575//===----------------------------------------------------------------------===//
576
Tom Stellard75aadc22012-12-11 21:25:42 +0000577SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000578 MachineFunction &MF = DAG.getMachineFunction();
579 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 switch (Op.getOpcode()) {
581 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000582 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
583 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000584 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000585 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000586 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000587 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
588 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000589 case ISD::FCOS:
590 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000592 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000593 case ISD::LOAD: {
594 SDValue Result = LowerLOAD(Op, DAG);
595 assert((!Result.getNode() ||
596 Result.getNode()->getNumValues() == 2) &&
597 "Load should return a value and a chain");
598 return Result;
599 }
600
Matt Arsenault1d555c42014-06-23 18:00:55 +0000601 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000602 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000603 case ISD::INTRINSIC_VOID: {
604 SDValue Chain = Op.getOperand(0);
605 unsigned IntrinsicID =
606 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
607 switch (IntrinsicID) {
608 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000609 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
610 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000611 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000612 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000614 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000615 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000616 const SDValue Args[8] = {
617 Chain,
618 Op.getOperand(2), // Export Value
619 Op.getOperand(3), // ArrayBase
620 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000621 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
622 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
623 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
624 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000625 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000626 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000628
Tom Stellard75aadc22012-12-11 21:25:42 +0000629 // default for switch(IntrinsicID)
630 default: break;
631 }
632 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
633 break;
634 }
635 case ISD::INTRINSIC_WO_CHAIN: {
636 unsigned IntrinsicID =
637 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
638 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000639 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000640 switch(IntrinsicID) {
641 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000642 case AMDGPUIntrinsic::R600_load_input: {
643 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
644 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
645 MachineFunction &MF = DAG.getMachineFunction();
646 MachineRegisterInfo &MRI = MF.getRegInfo();
647 MRI.addLiveIn(Reg);
648 return DAG.getCopyFromReg(DAG.getEntryNode(),
649 SDLoc(DAG.getEntryNode()), Reg, VT);
650 }
651
652 case AMDGPUIntrinsic::R600_interp_input: {
653 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
654 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
655 MachineSDNode *interp;
656 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000657 const R600InstrInfo *TII =
658 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000659 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000660 MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000661 return DAG.getTargetExtractSubreg(
662 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
663 DL, MVT::f32, SDValue(interp, 0));
664 }
665 MachineFunction &MF = DAG.getMachineFunction();
666 MachineRegisterInfo &MRI = MF.getRegInfo();
667 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
668 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
669 MRI.addLiveIn(RegisterI);
670 MRI.addLiveIn(RegisterJ);
671 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
672 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
673 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
674 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
675
676 if (slot % 4 < 2)
677 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000678 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000679 RegisterJNode, RegisterINode);
680 else
681 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000682 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000683 RegisterJNode, RegisterINode);
684 return SDValue(interp, slot % 2);
685 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000686 case AMDGPUIntrinsic::R600_interp_xy:
687 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000688 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000689 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000690 SDValue RegisterINode = Op.getOperand(2);
691 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000692
Vincent Lejeunef143af32013-11-11 22:10:24 +0000693 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000694 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000695 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000696 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000697 else
698 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000699 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000700 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000701 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
702 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000704 case AMDGPUIntrinsic::R600_tex:
705 case AMDGPUIntrinsic::R600_texc:
706 case AMDGPUIntrinsic::R600_txl:
707 case AMDGPUIntrinsic::R600_txlc:
708 case AMDGPUIntrinsic::R600_txb:
709 case AMDGPUIntrinsic::R600_txbc:
710 case AMDGPUIntrinsic::R600_txf:
711 case AMDGPUIntrinsic::R600_txq:
712 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000713 case AMDGPUIntrinsic::R600_ddy:
714 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000715 unsigned TextureOp;
716 switch (IntrinsicID) {
717 case AMDGPUIntrinsic::R600_tex:
718 TextureOp = 0;
719 break;
720 case AMDGPUIntrinsic::R600_texc:
721 TextureOp = 1;
722 break;
723 case AMDGPUIntrinsic::R600_txl:
724 TextureOp = 2;
725 break;
726 case AMDGPUIntrinsic::R600_txlc:
727 TextureOp = 3;
728 break;
729 case AMDGPUIntrinsic::R600_txb:
730 TextureOp = 4;
731 break;
732 case AMDGPUIntrinsic::R600_txbc:
733 TextureOp = 5;
734 break;
735 case AMDGPUIntrinsic::R600_txf:
736 TextureOp = 6;
737 break;
738 case AMDGPUIntrinsic::R600_txq:
739 TextureOp = 7;
740 break;
741 case AMDGPUIntrinsic::R600_ddx:
742 TextureOp = 8;
743 break;
744 case AMDGPUIntrinsic::R600_ddy:
745 TextureOp = 9;
746 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000747 case AMDGPUIntrinsic::R600_ldptr:
748 TextureOp = 10;
749 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000750 default:
751 llvm_unreachable("Unknow Texture Operation");
752 }
753
754 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000756 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000757 DAG.getConstant(0, DL, MVT::i32),
758 DAG.getConstant(1, DL, MVT::i32),
759 DAG.getConstant(2, DL, MVT::i32),
760 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000761 Op.getOperand(2),
762 Op.getOperand(3),
763 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000764 DAG.getConstant(0, DL, MVT::i32),
765 DAG.getConstant(1, DL, MVT::i32),
766 DAG.getConstant(2, DL, MVT::i32),
767 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000768 Op.getOperand(5),
769 Op.getOperand(6),
770 Op.getOperand(7),
771 Op.getOperand(8),
772 Op.getOperand(9),
773 Op.getOperand(10)
774 };
Craig Topper48d114b2014-04-26 18:35:24 +0000775 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000776 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000777 case AMDGPUIntrinsic::AMDGPU_dp4: {
778 SDValue Args[8] = {
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000780 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000782 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000784 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000786 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000788 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000789 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000790 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000791 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000792 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000793 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000794 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000795 };
Craig Topper48d114b2014-04-26 18:35:24 +0000796 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000797 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000798
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000813 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return LowerImplicitParameter(DAG, VT, DL, 8);
817
Tom Stellarddcb9f092015-07-09 21:20:37 +0000818 case Intrinsic::AMDGPU_read_workdim: {
819 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
820 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
821 }
Jan Veselye5121f32014-10-14 20:05:26 +0000822
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000823 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000824 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
825 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000826 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
828 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000829 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000830 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
831 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000832 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000833 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
834 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000835 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000836 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
837 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000838 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000839 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
840 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000841 case Intrinsic::AMDGPU_rsq:
842 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
843 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Marek Olsak43650e42015-03-24 13:40:08 +0000844
845 case AMDGPUIntrinsic::AMDGPU_fract:
846 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
847 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000848 }
849 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
850 break;
851 }
852 } // end switch(Op.getOpcode())
853 return SDValue();
854}
855
856void R600TargetLowering::ReplaceNodeResults(SDNode *N,
857 SmallVectorImpl<SDValue> &Results,
858 SelectionDAG &DAG) const {
859 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000860 default:
861 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
862 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000863 case ISD::FP_TO_UINT:
864 if (N->getValueType(0) == MVT::i1) {
865 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
866 return;
867 }
868 // Fall-through. Since we don't care about out of bounds values
869 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
870 // considers some extra cases which are not necessary here.
871 case ISD::FP_TO_SINT: {
872 SDValue Result;
873 if (expandFP_TO_SINT(N, Result, DAG))
874 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000875 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000876 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000877 case ISD::SDIVREM: {
878 SDValue Op = SDValue(N, 1);
879 SDValue RES = LowerSDIVREM(Op, DAG);
880 Results.push_back(RES);
881 Results.push_back(RES.getValue(1));
882 break;
883 }
884 case ISD::UDIVREM: {
885 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000886 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000887 break;
888 }
889 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000890}
891
Tom Stellard880a80a2014-06-17 16:53:14 +0000892SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
893 SDValue Vector) const {
894
895 SDLoc DL(Vector);
896 EVT VecVT = Vector.getValueType();
897 EVT EltVT = VecVT.getVectorElementType();
898 SmallVector<SDValue, 8> Args;
899
900 for (unsigned i = 0, e = VecVT.getVectorNumElements();
901 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000902 Args.push_back(DAG.getNode(
903 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
904 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000905 }
906
907 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
908}
909
910SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
911 SelectionDAG &DAG) const {
912
913 SDLoc DL(Op);
914 SDValue Vector = Op.getOperand(0);
915 SDValue Index = Op.getOperand(1);
916
917 if (isa<ConstantSDNode>(Index) ||
918 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
919 return Op;
920
921 Vector = vectorToVerticalVector(DAG, Vector);
922 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
923 Vector, Index);
924}
925
926SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
927 SelectionDAG &DAG) const {
928 SDLoc DL(Op);
929 SDValue Vector = Op.getOperand(0);
930 SDValue Value = Op.getOperand(1);
931 SDValue Index = Op.getOperand(2);
932
933 if (isa<ConstantSDNode>(Index) ||
934 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
935 return Op;
936
937 Vector = vectorToVerticalVector(DAG, Vector);
938 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
939 Vector, Value, Index);
940 return vectorToVerticalVector(DAG, Insert);
941}
942
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000943SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
944 // On hw >= R700, COS/SIN input must be between -1. and 1.
945 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
946 EVT VT = Op.getValueType();
947 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000948 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000949
950 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000951 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
952 DAG.getNode(ISD::FADD, DL, VT,
953 DAG.getNode(ISD::FMUL, DL, VT, Arg,
954 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
955 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000956 unsigned TrigNode;
957 switch (Op.getOpcode()) {
958 case ISD::FCOS:
959 TrigNode = AMDGPUISD::COS_HW;
960 break;
961 case ISD::FSIN:
962 TrigNode = AMDGPUISD::SIN_HW;
963 break;
964 default:
965 llvm_unreachable("Wrong trig opcode");
966 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000967 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
968 DAG.getNode(ISD::FADD, DL, VT, FractPart,
969 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000970 if (Gen >= AMDGPUSubtarget::R700)
971 return TrigVal;
972 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000973 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
974 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000975}
976
Jan Vesely25f36272014-06-18 12:27:13 +0000977SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
978 SDLoc DL(Op);
979 EVT VT = Op.getValueType();
980
981 SDValue Lo = Op.getOperand(0);
982 SDValue Hi = Op.getOperand(1);
983 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000984 SDValue Zero = DAG.getConstant(0, DL, VT);
985 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000986
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000987 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
988 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000989 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
990 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
991
992 // The dance around Width1 is necessary for 0 special case.
993 // Without it the CompShift might be 32, producing incorrect results in
994 // Overflow. So we do the shift in two steps, the alternative is to
995 // add a conditional to filter the special case.
996
997 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
998 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
999
1000 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1001 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1002 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1003
1004 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1005 SDValue LoBig = Zero;
1006
1007 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1008 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1009
1010 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1011}
1012
Jan Vesely900ff2e2014-06-18 12:27:15 +00001013SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1014 SDLoc DL(Op);
1015 EVT VT = Op.getValueType();
1016
1017 SDValue Lo = Op.getOperand(0);
1018 SDValue Hi = Op.getOperand(1);
1019 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001020 SDValue Zero = DAG.getConstant(0, DL, VT);
1021 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001022
Jan Veselyecf51332014-06-18 12:27:17 +00001023 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1024
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001025 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1026 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001027 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1028 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1029
1030 // The dance around Width1 is necessary for 0 special case.
1031 // Without it the CompShift might be 32, producing incorrect results in
1032 // Overflow. So we do the shift in two steps, the alternative is to
1033 // add a conditional to filter the special case.
1034
1035 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1036 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1037
Jan Veselyecf51332014-06-18 12:27:17 +00001038 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001039 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1040 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1041
Jan Veselyecf51332014-06-18 12:27:17 +00001042 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1043 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001044
1045 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1046 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1047
1048 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1049}
1050
Jan Vesely808fff52015-04-30 17:15:56 +00001051SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1052 unsigned mainop, unsigned ovf) const {
1053 SDLoc DL(Op);
1054 EVT VT = Op.getValueType();
1055
1056 SDValue Lo = Op.getOperand(0);
1057 SDValue Hi = Op.getOperand(1);
1058
1059 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1060 // Extend sign.
1061 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1062 DAG.getValueType(MVT::i1));
1063
1064 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1065
1066 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1067}
1068
Tom Stellard75aadc22012-12-11 21:25:42 +00001069SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001070 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001071 return DAG.getNode(
1072 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001073 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001074 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001075 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001076 DAG.getCondCode(ISD::SETNE)
1077 );
1078}
1079
Tom Stellard75aadc22012-12-11 21:25:42 +00001080SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001081 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001082 unsigned DwordOffset) const {
1083 unsigned ByteOffset = DwordOffset * 4;
1084 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001085 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001086
1087 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1088 assert(isInt<16>(ByteOffset));
1089
1090 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001091 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001092 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1093 false, false, false, 0);
1094}
1095
Tom Stellard75aadc22012-12-11 21:25:42 +00001096bool R600TargetLowering::isZero(SDValue Op) const {
1097 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1098 return Cst->isNullValue();
1099 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1100 return CstFP->isZero();
1101 } else {
1102 return false;
1103 }
1104}
1105
1106SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001107 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001108 EVT VT = Op.getValueType();
1109
1110 SDValue LHS = Op.getOperand(0);
1111 SDValue RHS = Op.getOperand(1);
1112 SDValue True = Op.getOperand(2);
1113 SDValue False = Op.getOperand(3);
1114 SDValue CC = Op.getOperand(4);
1115 SDValue Temp;
1116
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001117 if (VT == MVT::f32) {
1118 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1119 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1120 if (MinMax)
1121 return MinMax;
1122 }
1123
Tom Stellard75aadc22012-12-11 21:25:42 +00001124 // LHS and RHS are guaranteed to be the same value type
1125 EVT CompareVT = LHS.getValueType();
1126
1127 // Check if we can lower this to a native operation.
1128
Tom Stellard2add82d2013-03-08 15:37:09 +00001129 // Try to lower to a SET* instruction:
1130 //
1131 // SET* can match the following patterns:
1132 //
Tom Stellardcd428182013-09-28 02:50:38 +00001133 // select_cc f32, f32, -1, 0, cc_supported
1134 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1135 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001136 //
1137
1138 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001139 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1140 ISD::CondCode InverseCC =
1141 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001142 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1143 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1144 std::swap(False, True);
1145 CC = DAG.getCondCode(InverseCC);
1146 } else {
1147 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1148 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1149 std::swap(False, True);
1150 std::swap(LHS, RHS);
1151 CC = DAG.getCondCode(SwapInvCC);
1152 }
1153 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001154 }
1155
1156 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1157 (CompareVT == VT || VT == MVT::i32)) {
1158 // This can be matched by a SET* instruction.
1159 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1160 }
1161
Tom Stellard75aadc22012-12-11 21:25:42 +00001162 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001163 //
1164 // CND* can match the following patterns:
1165 //
Tom Stellardcd428182013-09-28 02:50:38 +00001166 // select_cc f32, 0.0, f32, f32, cc_supported
1167 // select_cc f32, 0.0, i32, i32, cc_supported
1168 // select_cc i32, 0, f32, f32, cc_supported
1169 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001170 //
Tom Stellardcd428182013-09-28 02:50:38 +00001171
1172 // Try to move the zero value to the RHS
1173 if (isZero(LHS)) {
1174 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1175 // Try swapping the operands
1176 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1177 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1178 std::swap(LHS, RHS);
1179 CC = DAG.getCondCode(CCSwapped);
1180 } else {
1181 // Try inverting the conditon and then swapping the operands
1182 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1183 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1184 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1185 std::swap(True, False);
1186 std::swap(LHS, RHS);
1187 CC = DAG.getCondCode(CCSwapped);
1188 }
1189 }
1190 }
1191 if (isZero(RHS)) {
1192 SDValue Cond = LHS;
1193 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001194 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1195 if (CompareVT != VT) {
1196 // Bitcast True / False to the correct types. This will end up being
1197 // a nop, but it allows us to define only a single pattern in the
1198 // .TD files for each CND* instruction rather than having to have
1199 // one pattern for integer True/False and one for fp True/False
1200 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1201 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1202 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001203
1204 switch (CCOpcode) {
1205 case ISD::SETONE:
1206 case ISD::SETUNE:
1207 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001208 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1209 Temp = True;
1210 True = False;
1211 False = Temp;
1212 break;
1213 default:
1214 break;
1215 }
1216 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1217 Cond, Zero,
1218 True, False,
1219 DAG.getCondCode(CCOpcode));
1220 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1221 }
1222
Tom Stellard75aadc22012-12-11 21:25:42 +00001223 // If we make it this for it means we have no native instructions to handle
1224 // this SELECT_CC, so we must lower it.
1225 SDValue HWTrue, HWFalse;
1226
1227 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001228 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1229 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001230 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001231 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1232 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001233 }
1234 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001235 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001236 }
1237
1238 // Lower this unsupported SELECT_CC into a combination of two supported
1239 // SELECT_CC operations.
1240 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1241
1242 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1243 Cond, HWFalse,
1244 True, False,
1245 DAG.getCondCode(ISD::SETNE));
1246}
1247
Alp Tokercb402912014-01-24 17:20:08 +00001248/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001249/// convert these pointers to a register index. Each register holds
1250/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1251/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1252/// for indirect addressing.
1253SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1254 unsigned StackWidth,
1255 SelectionDAG &DAG) const {
1256 unsigned SRLPad;
1257 switch(StackWidth) {
1258 case 1:
1259 SRLPad = 2;
1260 break;
1261 case 2:
1262 SRLPad = 3;
1263 break;
1264 case 4:
1265 SRLPad = 4;
1266 break;
1267 default: llvm_unreachable("Invalid stack width");
1268 }
1269
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001270 SDLoc DL(Ptr);
1271 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1272 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001273}
1274
1275void R600TargetLowering::getStackAddress(unsigned StackWidth,
1276 unsigned ElemIdx,
1277 unsigned &Channel,
1278 unsigned &PtrIncr) const {
1279 switch (StackWidth) {
1280 default:
1281 case 1:
1282 Channel = 0;
1283 if (ElemIdx > 0) {
1284 PtrIncr = 1;
1285 } else {
1286 PtrIncr = 0;
1287 }
1288 break;
1289 case 2:
1290 Channel = ElemIdx % 2;
1291 if (ElemIdx == 2) {
1292 PtrIncr = 1;
1293 } else {
1294 PtrIncr = 0;
1295 }
1296 break;
1297 case 4:
1298 Channel = ElemIdx;
1299 PtrIncr = 0;
1300 break;
1301 }
1302}
1303
Tom Stellard75aadc22012-12-11 21:25:42 +00001304SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001305 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001306 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1307 SDValue Chain = Op.getOperand(0);
1308 SDValue Value = Op.getOperand(1);
1309 SDValue Ptr = Op.getOperand(2);
1310
Tom Stellard2ffc3302013-08-26 15:05:44 +00001311 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001312 if (Result.getNode()) {
1313 return Result;
1314 }
1315
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001316 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1317 if (StoreNode->isTruncatingStore()) {
1318 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001319 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001320 EVT MemVT = StoreNode->getMemoryVT();
1321 SDValue MaskConstant;
1322 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001323 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001324 } else {
1325 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001326 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001327 }
1328 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001329 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001330 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001331 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001332 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1333 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001334 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001335 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1336 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1337 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1338 // vector instead.
1339 SDValue Src[4] = {
1340 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001341 DAG.getConstant(0, DL, MVT::i32),
1342 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001343 Mask
1344 };
Craig Topper48d114b2014-04-26 18:35:24 +00001345 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001346 SDValue Args[3] = { Chain, Input, DWordAddr };
1347 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001348 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001349 StoreNode->getMemOperand());
1350 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1351 Value.getValueType().bitsGE(MVT::i32)) {
1352 // Convert pointer from byte address to dword address.
1353 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1354 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001355 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001356
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001357 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001358 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001359 } else {
1360 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1361 }
1362 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001363 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001364 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001365
1366 EVT ValueVT = Value.getValueType();
1367
1368 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1369 return SDValue();
1370 }
1371
Tom Stellarde9373602014-01-22 19:24:14 +00001372 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1373 if (Ret.getNode()) {
1374 return Ret;
1375 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001376 // Lowering for indirect addressing
1377
1378 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001379 const AMDGPUFrameLowering *TFL =
1380 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001381 unsigned StackWidth = TFL->getStackWidth(MF);
1382
1383 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1384
1385 if (ValueVT.isVector()) {
1386 unsigned NumElemVT = ValueVT.getVectorNumElements();
1387 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001388 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001389
1390 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1391 "vector width in load");
1392
1393 for (unsigned i = 0; i < NumElemVT; ++i) {
1394 unsigned Channel, PtrIncr;
1395 getStackAddress(StackWidth, i, Channel, PtrIncr);
1396 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001397 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001398 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001399 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001400
1401 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1402 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001403 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001404 }
Craig Topper48d114b2014-04-26 18:35:24 +00001405 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001406 } else {
1407 if (ValueVT == MVT::i8) {
1408 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1409 }
1410 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001411 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001412 }
1413
1414 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001415}
1416
Tom Stellard365366f2013-01-23 02:09:06 +00001417// return (512 + (kc_bank << 12)
1418static int
1419ConstantAddressBlock(unsigned AddressSpace) {
1420 switch (AddressSpace) {
1421 case AMDGPUAS::CONSTANT_BUFFER_0:
1422 return 512;
1423 case AMDGPUAS::CONSTANT_BUFFER_1:
1424 return 512 + 4096;
1425 case AMDGPUAS::CONSTANT_BUFFER_2:
1426 return 512 + 4096 * 2;
1427 case AMDGPUAS::CONSTANT_BUFFER_3:
1428 return 512 + 4096 * 3;
1429 case AMDGPUAS::CONSTANT_BUFFER_4:
1430 return 512 + 4096 * 4;
1431 case AMDGPUAS::CONSTANT_BUFFER_5:
1432 return 512 + 4096 * 5;
1433 case AMDGPUAS::CONSTANT_BUFFER_6:
1434 return 512 + 4096 * 6;
1435 case AMDGPUAS::CONSTANT_BUFFER_7:
1436 return 512 + 4096 * 7;
1437 case AMDGPUAS::CONSTANT_BUFFER_8:
1438 return 512 + 4096 * 8;
1439 case AMDGPUAS::CONSTANT_BUFFER_9:
1440 return 512 + 4096 * 9;
1441 case AMDGPUAS::CONSTANT_BUFFER_10:
1442 return 512 + 4096 * 10;
1443 case AMDGPUAS::CONSTANT_BUFFER_11:
1444 return 512 + 4096 * 11;
1445 case AMDGPUAS::CONSTANT_BUFFER_12:
1446 return 512 + 4096 * 12;
1447 case AMDGPUAS::CONSTANT_BUFFER_13:
1448 return 512 + 4096 * 13;
1449 case AMDGPUAS::CONSTANT_BUFFER_14:
1450 return 512 + 4096 * 14;
1451 case AMDGPUAS::CONSTANT_BUFFER_15:
1452 return 512 + 4096 * 15;
1453 default:
1454 return -1;
1455 }
1456}
1457
1458SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1459{
1460 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001461 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001462 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1463 SDValue Chain = Op.getOperand(0);
1464 SDValue Ptr = Op.getOperand(1);
1465 SDValue LoweredLoad;
1466
Matt Arsenault8b03e6c2015-07-09 18:47:03 +00001467 if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1468 return Ret;
Tom Stellarde9373602014-01-22 19:24:14 +00001469
Tom Stellard067c8152014-07-21 14:01:14 +00001470 // Lower loads constant address space global variable loads
1471 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001472 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001473 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001474
Mehdi Amini44ede332015-07-09 02:09:04 +00001475 SDValue Ptr = DAG.getZExtOrTrunc(
1476 LoadNode->getBasePtr(), DL,
1477 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001478 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001479 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001480 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1481 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001482 DAG.getTargetConstant(0, DL, MVT::i32),
1483 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001484 }
Tom Stellarde9373602014-01-22 19:24:14 +00001485
Tom Stellard35bb18c2013-08-26 15:06:04 +00001486 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1487 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001488 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001489 Chain
1490 };
Craig Topper64941d92014-04-27 19:20:57 +00001491 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001492 }
1493
Tom Stellard365366f2013-01-23 02:09:06 +00001494 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001495 if (ConstantBlock > -1 &&
1496 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1497 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001498 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001499 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1500 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001501 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001502 SDValue Slots[4];
1503 for (unsigned i = 0; i < 4; i++) {
1504 // We want Const position encoded with the following formula :
1505 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1506 // const_index is Ptr computed by llvm using an alignment of 16.
1507 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1508 // then div by 4 at the ISel step
1509 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001510 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001511 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1512 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001513 EVT NewVT = MVT::v4i32;
1514 unsigned NumElements = 4;
1515 if (VT.isVector()) {
1516 NewVT = VT;
1517 NumElements = VT.getVectorNumElements();
1518 }
Craig Topper48d114b2014-04-26 18:35:24 +00001519 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001520 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001521 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001522 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001523 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001524 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1525 DAG.getConstant(4, DL, MVT::i32)),
1526 DAG.getConstant(LoadNode->getAddressSpace() -
1527 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001528 );
1529 }
1530
1531 if (!VT.isVector()) {
1532 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001533 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001534 }
1535
1536 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001537 Result,
1538 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001539 };
Craig Topper64941d92014-04-27 19:20:57 +00001540 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001541 }
1542
Matt Arsenault909d0c02013-10-30 23:43:29 +00001543 // For most operations returning SDValue() will result in the node being
1544 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1545 // need to manually expand loads that may be legal in some address spaces and
1546 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1547 // compute shaders, since the data is sign extended when it is uploaded to the
1548 // buffer. However SEXT loads from other address spaces are not supported, so
1549 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001550 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1551 EVT MemVT = LoadNode->getMemoryVT();
1552 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001553 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1554 LoadNode->getPointerInfo(), MemVT,
1555 LoadNode->isVolatile(),
1556 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001557 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001558 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001559 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1560 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001561
Jan Veselyb670d372015-05-26 18:07:22 +00001562 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001563 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001564 }
1565
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001566 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1567 return SDValue();
1568 }
1569
1570 // Lowering for indirect addressing
1571 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001572 const AMDGPUFrameLowering *TFL =
1573 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001574 unsigned StackWidth = TFL->getStackWidth(MF);
1575
1576 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1577
1578 if (VT.isVector()) {
1579 unsigned NumElemVT = VT.getVectorNumElements();
1580 EVT ElemVT = VT.getVectorElementType();
1581 SDValue Loads[4];
1582
1583 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1584 "vector width in load");
1585
1586 for (unsigned i = 0; i < NumElemVT; ++i) {
1587 unsigned Channel, PtrIncr;
1588 getStackAddress(StackWidth, i, Channel, PtrIncr);
1589 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001590 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001591 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1592 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001593 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001594 Op.getOperand(2));
1595 }
1596 for (unsigned i = NumElemVT; i < 4; ++i) {
1597 Loads[i] = DAG.getUNDEF(ElemVT);
1598 }
1599 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001600 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001601 } else {
1602 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1603 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001604 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001605 Op.getOperand(2));
1606 }
1607
Matt Arsenault7939acd2014-04-07 16:44:24 +00001608 SDValue Ops[2] = {
1609 LoweredLoad,
1610 Chain
1611 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001612
Craig Topper64941d92014-04-27 19:20:57 +00001613 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001614}
Tom Stellard75aadc22012-12-11 21:25:42 +00001615
Matt Arsenault1d555c42014-06-23 18:00:55 +00001616SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1617 SDValue Chain = Op.getOperand(0);
1618 SDValue Cond = Op.getOperand(1);
1619 SDValue Jump = Op.getOperand(2);
1620
1621 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1622 Chain, Jump, Cond);
1623}
1624
Tom Stellard75aadc22012-12-11 21:25:42 +00001625/// XXX Only kernel functions are supported, so we can assume for now that
1626/// every function is a kernel function, but in the future we should use
1627/// separate calling conventions for kernel and non-kernel functions.
1628SDValue R600TargetLowering::LowerFormalArguments(
1629 SDValue Chain,
1630 CallingConv::ID CallConv,
1631 bool isVarArg,
1632 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001633 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001634 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001635 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001636 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1637 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001638 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001639 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001640
Tom Stellardaf775432013-10-23 00:44:32 +00001641 SmallVector<ISD::InputArg, 8> LocalIns;
1642
Matt Arsenault209a7b92014-04-18 07:40:20 +00001643 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001644
1645 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001646
Tom Stellard1e803092013-07-23 01:48:18 +00001647 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001648 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001649 const ISD::InputArg &In = Ins[i];
1650 EVT VT = In.VT;
1651 EVT MemVT = VA.getLocVT();
1652 if (!VT.isVector() && MemVT.isVector()) {
1653 // Get load source type if scalarized.
1654 MemVT = MemVT.getVectorElementType();
1655 }
Tom Stellard78e01292013-07-23 01:47:58 +00001656
Jan Veselye5121f32014-10-14 20:05:26 +00001657 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001658 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1659 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1660 InVals.push_back(Register);
1661 continue;
1662 }
1663
Tom Stellard75aadc22012-12-11 21:25:42 +00001664 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001665 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001666
Matt Arsenaultfae02982014-03-17 18:58:11 +00001667 // i64 isn't a legal type, so the register type used ends up as i32, which
1668 // isn't expected here. It attempts to create this sextload, but it ends up
1669 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1670 // for <1 x i64>.
1671
Tom Stellardacfeebf2013-07-23 01:48:05 +00001672 // The first 36 bytes of the input buffer contains information about
1673 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001674 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1675 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1676 // FIXME: This should really check the extload type, but the handling of
1677 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001678
Matt Arsenault74ef2772014-08-13 18:14:11 +00001679 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1680 Ext = ISD::SEXTLOAD;
1681 }
1682
1683 // Compute the offset from the value.
1684 // XXX - I think PartOffset should give you this, but it seems to give the
1685 // size of the register which isn't useful.
1686
Andrew Trick05938a52015-02-16 18:10:47 +00001687 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001688 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001689 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001690
1691 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1692 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001693 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001694 DAG.getUNDEF(MVT::i32),
1695 PtrInfo,
1696 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001697
1698 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001699 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001700 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001701 }
1702 return Chain;
1703}
1704
Mehdi Amini44ede332015-07-09 02:09:04 +00001705EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1706 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001707 if (!VT.isVector())
1708 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001709 return VT.changeVectorElementTypeToInteger();
1710}
1711
Matt Arsenault209a7b92014-04-18 07:40:20 +00001712static SDValue CompactSwizzlableVector(
1713 SelectionDAG &DAG, SDValue VectorEntry,
1714 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001715 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1716 assert(RemapSwizzle.empty());
1717 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001718 VectorEntry.getOperand(0),
1719 VectorEntry.getOperand(1),
1720 VectorEntry.getOperand(2),
1721 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001722 };
1723
1724 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001725 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1726 // We mask write here to teach later passes that the ith element of this
1727 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1728 // break false dependencies and additionnaly make assembly easier to read.
1729 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001730 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1731 if (C->isZero()) {
1732 RemapSwizzle[i] = 4; // SEL_0
1733 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1734 } else if (C->isExactlyValue(1.0)) {
1735 RemapSwizzle[i] = 5; // SEL_1
1736 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1737 }
1738 }
1739
1740 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1741 continue;
1742 for (unsigned j = 0; j < i; j++) {
1743 if (NewBldVec[i] == NewBldVec[j]) {
1744 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1745 RemapSwizzle[i] = j;
1746 break;
1747 }
1748 }
1749 }
1750
1751 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001752 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001753}
1754
Benjamin Kramer193960c2013-06-11 13:32:25 +00001755static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1756 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001757 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1758 assert(RemapSwizzle.empty());
1759 SDValue NewBldVec[4] = {
1760 VectorEntry.getOperand(0),
1761 VectorEntry.getOperand(1),
1762 VectorEntry.getOperand(2),
1763 VectorEntry.getOperand(3)
1764 };
1765 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001766 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001767 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001768 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1769 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1770 ->getZExtValue();
1771 if (i == Idx)
1772 isUnmovable[Idx] = true;
1773 }
1774 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001775
1776 for (unsigned i = 0; i < 4; i++) {
1777 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1778 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1779 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001780 if (isUnmovable[Idx])
1781 continue;
1782 // Swap i and Idx
1783 std::swap(NewBldVec[Idx], NewBldVec[i]);
1784 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1785 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001786 }
1787 }
1788
1789 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001790 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001791}
1792
1793
1794SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001795 SDValue Swz[4], SelectionDAG &DAG,
1796 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001797 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1798 // Old -> New swizzle values
1799 DenseMap<unsigned, unsigned> SwizzleRemap;
1800
1801 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1802 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001803 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001804 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001805 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001806 }
1807
1808 SwizzleRemap.clear();
1809 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1810 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001811 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001812 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001813 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001814 }
1815
1816 return BuildVector;
1817}
1818
1819
Tom Stellard75aadc22012-12-11 21:25:42 +00001820//===----------------------------------------------------------------------===//
1821// Custom DAG Optimizations
1822//===----------------------------------------------------------------------===//
1823
1824SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1825 DAGCombinerInfo &DCI) const {
1826 SelectionDAG &DAG = DCI.DAG;
1827
1828 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001829 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001830 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1831 case ISD::FP_ROUND: {
1832 SDValue Arg = N->getOperand(0);
1833 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001834 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001835 Arg.getOperand(0));
1836 }
1837 break;
1838 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001839
1840 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1841 // (i32 select_cc f32, f32, -1, 0 cc)
1842 //
1843 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1844 // this to one of the SET*_DX10 instructions.
1845 case ISD::FP_TO_SINT: {
1846 SDValue FNeg = N->getOperand(0);
1847 if (FNeg.getOpcode() != ISD::FNEG) {
1848 return SDValue();
1849 }
1850 SDValue SelectCC = FNeg.getOperand(0);
1851 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1852 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1853 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1854 !isHWTrueValue(SelectCC.getOperand(2)) ||
1855 !isHWFalseValue(SelectCC.getOperand(3))) {
1856 return SDValue();
1857 }
1858
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001859 SDLoc dl(N);
1860 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001861 SelectCC.getOperand(0), // LHS
1862 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001863 DAG.getConstant(-1, dl, MVT::i32), // True
1864 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001865 SelectCC.getOperand(4)); // CC
1866
1867 break;
1868 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001869
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001870 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1871 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001872 case ISD::INSERT_VECTOR_ELT: {
1873 SDValue InVec = N->getOperand(0);
1874 SDValue InVal = N->getOperand(1);
1875 SDValue EltNo = N->getOperand(2);
1876 SDLoc dl(N);
1877
1878 // If the inserted element is an UNDEF, just use the input vector.
1879 if (InVal.getOpcode() == ISD::UNDEF)
1880 return InVec;
1881
1882 EVT VT = InVec.getValueType();
1883
1884 // If we can't generate a legal BUILD_VECTOR, exit
1885 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1886 return SDValue();
1887
1888 // Check that we know which element is being inserted
1889 if (!isa<ConstantSDNode>(EltNo))
1890 return SDValue();
1891 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1892
1893 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1894 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1895 // vector elements.
1896 SmallVector<SDValue, 8> Ops;
1897 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1898 Ops.append(InVec.getNode()->op_begin(),
1899 InVec.getNode()->op_end());
1900 } else if (InVec.getOpcode() == ISD::UNDEF) {
1901 unsigned NElts = VT.getVectorNumElements();
1902 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1903 } else {
1904 return SDValue();
1905 }
1906
1907 // Insert the element
1908 if (Elt < Ops.size()) {
1909 // All the operands of BUILD_VECTOR must have the same type;
1910 // we enforce that here.
1911 EVT OpVT = Ops[0].getValueType();
1912 if (InVal.getValueType() != OpVT)
1913 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1914 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1915 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1916 Ops[Elt] = InVal;
1917 }
1918
1919 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001920 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001921 }
1922
Tom Stellard365366f2013-01-23 02:09:06 +00001923 // Extract_vec (Build_vector) generated by custom lowering
1924 // also needs to be customly combined
1925 case ISD::EXTRACT_VECTOR_ELT: {
1926 SDValue Arg = N->getOperand(0);
1927 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1928 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1929 unsigned Element = Const->getZExtValue();
1930 return Arg->getOperand(Element);
1931 }
1932 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001933 if (Arg.getOpcode() == ISD::BITCAST &&
1934 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1935 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1936 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001937 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001938 Arg->getOperand(0).getOperand(Element));
1939 }
1940 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00001941 break;
Tom Stellard365366f2013-01-23 02:09:06 +00001942 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001943
1944 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001945 // Try common optimizations
1946 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1947 if (Ret.getNode())
1948 return Ret;
1949
Tom Stellarde06163a2013-02-07 14:02:35 +00001950 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1951 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001952 //
1953 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1954 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001955 SDValue LHS = N->getOperand(0);
1956 if (LHS.getOpcode() != ISD::SELECT_CC) {
1957 return SDValue();
1958 }
1959
1960 SDValue RHS = N->getOperand(1);
1961 SDValue True = N->getOperand(2);
1962 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001963 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001964
1965 if (LHS.getOperand(2).getNode() != True.getNode() ||
1966 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001967 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001968 return SDValue();
1969 }
1970
Tom Stellard5e524892013-03-08 15:37:11 +00001971 switch (NCC) {
1972 default: return SDValue();
1973 case ISD::SETNE: return LHS;
1974 case ISD::SETEQ: {
1975 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1976 LHSCC = ISD::getSetCCInverse(LHSCC,
1977 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001978 if (DCI.isBeforeLegalizeOps() ||
1979 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1980 return DAG.getSelectCC(SDLoc(N),
1981 LHS.getOperand(0),
1982 LHS.getOperand(1),
1983 LHS.getOperand(2),
1984 LHS.getOperand(3),
1985 LHSCC);
1986 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001987 }
Tom Stellard5e524892013-03-08 15:37:11 +00001988 }
Tom Stellardcd428182013-09-28 02:50:38 +00001989 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001990 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001991
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001992 case AMDGPUISD::EXPORT: {
1993 SDValue Arg = N->getOperand(1);
1994 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1995 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001996
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001997 SDValue NewArgs[8] = {
1998 N->getOperand(0), // Chain
1999 SDValue(),
2000 N->getOperand(2), // ArrayBase
2001 N->getOperand(3), // Type
2002 N->getOperand(4), // SWZ_X
2003 N->getOperand(5), // SWZ_Y
2004 N->getOperand(6), // SWZ_Z
2005 N->getOperand(7) // SWZ_W
2006 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002007 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002008 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002009 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002010 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002011 case AMDGPUISD::TEXTURE_FETCH: {
2012 SDValue Arg = N->getOperand(1);
2013 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2014 break;
2015
2016 SDValue NewArgs[19] = {
2017 N->getOperand(0),
2018 N->getOperand(1),
2019 N->getOperand(2),
2020 N->getOperand(3),
2021 N->getOperand(4),
2022 N->getOperand(5),
2023 N->getOperand(6),
2024 N->getOperand(7),
2025 N->getOperand(8),
2026 N->getOperand(9),
2027 N->getOperand(10),
2028 N->getOperand(11),
2029 N->getOperand(12),
2030 N->getOperand(13),
2031 N->getOperand(14),
2032 N->getOperand(15),
2033 N->getOperand(16),
2034 N->getOperand(17),
2035 N->getOperand(18),
2036 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002037 SDLoc DL(N);
2038 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2039 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002040 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002041 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002042
2043 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002044}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002045
2046static bool
2047FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002048 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002049 const R600InstrInfo *TII =
2050 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002051 if (!Src.isMachineOpcode())
2052 return false;
2053 switch (Src.getMachineOpcode()) {
2054 case AMDGPU::FNEG_R600:
2055 if (!Neg.getNode())
2056 return false;
2057 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002058 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002059 return true;
2060 case AMDGPU::FABS_R600:
2061 if (!Abs.getNode())
2062 return false;
2063 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002064 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002065 return true;
2066 case AMDGPU::CONST_COPY: {
2067 unsigned Opcode = ParentNode->getMachineOpcode();
2068 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2069
2070 if (!Sel.getNode())
2071 return false;
2072
2073 SDValue CstOffset = Src.getOperand(0);
2074 if (ParentNode->getValueType(0).isVector())
2075 return false;
2076
2077 // Gather constants values
2078 int SrcIndices[] = {
2079 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2080 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2081 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2082 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2085 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2086 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2087 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2088 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2089 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2090 };
2091 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002092 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002093 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2094 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2095 continue;
2096 if (HasDst) {
2097 OtherSrcIdx--;
2098 OtherSelIdx--;
2099 }
2100 if (RegisterSDNode *Reg =
2101 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2102 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002103 ConstantSDNode *Cst
2104 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002105 Consts.push_back(Cst->getZExtValue());
2106 }
2107 }
2108 }
2109
Matt Arsenault37c12d72014-05-12 20:42:57 +00002110 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002111 Consts.push_back(Cst->getZExtValue());
2112 if (!TII->fitsConstReadLimitations(Consts)) {
2113 return false;
2114 }
2115
2116 Sel = CstOffset;
2117 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2118 return true;
2119 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002120 case AMDGPU::MOV_IMM_I32:
2121 case AMDGPU::MOV_IMM_F32: {
2122 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2123 uint64_t ImmValue = 0;
2124
2125
2126 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2127 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2128 float FloatValue = FPC->getValueAPF().convertToFloat();
2129 if (FloatValue == 0.0) {
2130 ImmReg = AMDGPU::ZERO;
2131 } else if (FloatValue == 0.5) {
2132 ImmReg = AMDGPU::HALF;
2133 } else if (FloatValue == 1.0) {
2134 ImmReg = AMDGPU::ONE;
2135 } else {
2136 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2137 }
2138 } else {
2139 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2140 uint64_t Value = C->getZExtValue();
2141 if (Value == 0) {
2142 ImmReg = AMDGPU::ZERO;
2143 } else if (Value == 1) {
2144 ImmReg = AMDGPU::ONE_INT;
2145 } else {
2146 ImmValue = Value;
2147 }
2148 }
2149
2150 // Check that we aren't already using an immediate.
2151 // XXX: It's possible for an instruction to have more than one
2152 // immediate operand, but this is not supported yet.
2153 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2154 if (!Imm.getNode())
2155 return false;
2156 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2157 assert(C);
2158 if (C->getZExtValue())
2159 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002160 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002161 }
2162 Src = DAG.getRegister(ImmReg, MVT::i32);
2163 return true;
2164 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002165 default:
2166 return false;
2167 }
2168}
2169
2170
2171/// \brief Fold the instructions after selecting them
2172SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2173 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002174 const R600InstrInfo *TII =
2175 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002176 if (!Node->isMachineOpcode())
2177 return Node;
2178 unsigned Opcode = Node->getMachineOpcode();
2179 SDValue FakeOp;
2180
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002181 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002182
2183 if (Opcode == AMDGPU::DOT_4) {
2184 int OperandIdx[] = {
2185 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2186 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2187 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2188 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2189 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002193 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002194 int NegIdx[] = {
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2203 };
2204 int AbsIdx[] = {
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2213 };
2214 for (unsigned i = 0; i < 8; i++) {
2215 if (OperandIdx[i] < 0)
2216 return Node;
2217 SDValue &Src = Ops[OperandIdx[i] - 1];
2218 SDValue &Neg = Ops[NegIdx[i] - 1];
2219 SDValue &Abs = Ops[AbsIdx[i] - 1];
2220 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2221 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2222 if (HasDst)
2223 SelIdx--;
2224 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002225 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2226 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2227 }
2228 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2229 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2230 SDValue &Src = Ops[i];
2231 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002232 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2233 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002234 } else if (Opcode == AMDGPU::CLAMP_R600) {
2235 SDValue Src = Node->getOperand(0);
2236 if (!Src.isMachineOpcode() ||
2237 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2238 return Node;
2239 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2240 AMDGPU::OpName::clamp);
2241 if (ClampIdx < 0)
2242 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002243 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002244 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002245 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2246 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2247 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002248 } else {
2249 if (!TII->hasInstrModifiers(Opcode))
2250 return Node;
2251 int OperandIdx[] = {
2252 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2253 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2254 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2255 };
2256 int NegIdx[] = {
2257 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2258 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2259 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2260 };
2261 int AbsIdx[] = {
2262 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2263 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2264 -1
2265 };
2266 for (unsigned i = 0; i < 3; i++) {
2267 if (OperandIdx[i] < 0)
2268 return Node;
2269 SDValue &Src = Ops[OperandIdx[i] - 1];
2270 SDValue &Neg = Ops[NegIdx[i] - 1];
2271 SDValue FakeAbs;
2272 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2273 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2274 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002275 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2276 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002277 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002278 ImmIdx--;
2279 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002280 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002281 SDValue &Imm = Ops[ImmIdx];
2282 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002283 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2284 }
2285 }
2286
2287 return Node;
2288}