blob: 18dafa65a81060248916312f6556d9689560c48e [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
193MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
194 MachineInstr * MI, MachineBasicBlock * BB) const {
195 MachineFunction * MF = BB->getParent();
196 MachineRegisterInfo &MRI = MF->getRegInfo();
197 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000198 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000199 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000200
201 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000202 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000203 // Replace LDS_*_RET instruction that don't have any uses with the
204 // equivalent LDS_*_NORET instruction.
205 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000206 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
207 assert(DstIdx != -1);
208 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000209 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
210 // LDS_1A2D support and remove this special case.
211 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
212 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000213 return BB;
214
215 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
216 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000217 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
218 NewMI.addOperand(MI->getOperand(i));
219 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000220 } else {
221 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
222 }
223 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000224 case AMDGPU::CLAMP_R600: {
225 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
226 AMDGPU::MOV,
227 MI->getOperand(0).getReg(),
228 MI->getOperand(1).getReg());
229 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
230 break;
231 }
232
233 case AMDGPU::FABS_R600: {
234 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
235 AMDGPU::MOV,
236 MI->getOperand(0).getReg(),
237 MI->getOperand(1).getReg());
238 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
239 break;
240 }
241
242 case AMDGPU::FNEG_R600: {
243 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
244 AMDGPU::MOV,
245 MI->getOperand(0).getReg(),
246 MI->getOperand(1).getReg());
247 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
248 break;
249 }
250
Tom Stellard75aadc22012-12-11 21:25:42 +0000251 case AMDGPU::MASK_WRITE: {
252 unsigned maskedRegister = MI->getOperand(0).getReg();
253 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
254 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
255 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
256 break;
257 }
258
259 case AMDGPU::MOV_IMM_F32:
260 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
261 MI->getOperand(1).getFPImm()->getValueAPF()
262 .bitcastToAPInt().getZExtValue());
263 break;
264 case AMDGPU::MOV_IMM_I32:
265 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
266 MI->getOperand(1).getImm());
267 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000268 case AMDGPU::CONST_COPY: {
269 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
270 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000271 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 MI->getOperand(1).getImm());
273 break;
274 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000275
276 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000277 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000278 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000279 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000280
281 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
282 .addOperand(MI->getOperand(0))
283 .addOperand(MI->getOperand(1))
284 .addImm(EOP); // Set End of program bit
285 break;
286 }
287
Tom Stellard75aadc22012-12-11 21:25:42 +0000288 case AMDGPU::TXD: {
289 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
290 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000291 MachineOperand &RID = MI->getOperand(4);
292 MachineOperand &SID = MI->getOperand(5);
293 unsigned TextureId = MI->getOperand(6).getImm();
294 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
295 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000296
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000297 switch (TextureId) {
298 case 5: // Rect
299 CTX = CTY = 0;
300 break;
301 case 6: // Shadow1D
302 SrcW = SrcZ;
303 break;
304 case 7: // Shadow2D
305 SrcW = SrcZ;
306 break;
307 case 8: // ShadowRect
308 CTX = CTY = 0;
309 SrcW = SrcZ;
310 break;
311 case 9: // 1DArray
312 SrcZ = SrcY;
313 CTZ = 0;
314 break;
315 case 10: // 2DArray
316 CTZ = 0;
317 break;
318 case 11: // Shadow1DArray
319 SrcZ = SrcY;
320 CTZ = 0;
321 break;
322 case 12: // Shadow2DArray
323 CTZ = 0;
324 break;
325 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 .addImm(SrcX)
348 .addImm(SrcY)
349 .addImm(SrcZ)
350 .addImm(SrcW)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(1)
356 .addImm(2)
357 .addImm(3)
358 .addOperand(RID)
359 .addOperand(SID)
360 .addImm(CTX)
361 .addImm(CTY)
362 .addImm(CTZ)
363 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
365 .addOperand(MI->getOperand(0))
366 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 .addReg(T0, RegState::Implicit)
385 .addReg(T1, RegState::Implicit);
386 break;
387 }
388
389 case AMDGPU::TXD_SHADOW: {
390 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
391 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000392 MachineOperand &RID = MI->getOperand(4);
393 MachineOperand &SID = MI->getOperand(5);
394 unsigned TextureId = MI->getOperand(6).getImm();
395 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
396 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
397
398 switch (TextureId) {
399 case 5: // Rect
400 CTX = CTY = 0;
401 break;
402 case 6: // Shadow1D
403 SrcW = SrcZ;
404 break;
405 case 7: // Shadow2D
406 SrcW = SrcZ;
407 break;
408 case 8: // ShadowRect
409 CTX = CTY = 0;
410 SrcW = SrcZ;
411 break;
412 case 9: // 1DArray
413 SrcZ = SrcY;
414 CTZ = 0;
415 break;
416 case 10: // 2DArray
417 CTZ = 0;
418 break;
419 case 11: // Shadow1DArray
420 SrcZ = SrcY;
421 CTZ = 0;
422 break;
423 case 12: // Shadow2DArray
424 CTZ = 0;
425 break;
426 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000427
428 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
429 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000430 .addImm(SrcX)
431 .addImm(SrcY)
432 .addImm(SrcZ)
433 .addImm(SrcW)
434 .addImm(0)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(1)
439 .addImm(2)
440 .addImm(3)
441 .addOperand(RID)
442 .addOperand(SID)
443 .addImm(CTX)
444 .addImm(CTY)
445 .addImm(CTZ)
446 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
448 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000449 .addImm(SrcX)
450 .addImm(SrcY)
451 .addImm(SrcZ)
452 .addImm(SrcW)
453 .addImm(0)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(1)
458 .addImm(2)
459 .addImm(3)
460 .addOperand(RID)
461 .addOperand(SID)
462 .addImm(CTX)
463 .addImm(CTY)
464 .addImm(CTZ)
465 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
467 .addOperand(MI->getOperand(0))
468 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000469 .addImm(SrcX)
470 .addImm(SrcY)
471 .addImm(SrcZ)
472 .addImm(SrcW)
473 .addImm(0)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(1)
478 .addImm(2)
479 .addImm(3)
480 .addOperand(RID)
481 .addOperand(SID)
482 .addImm(CTX)
483 .addImm(CTY)
484 .addImm(CTZ)
485 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 .addReg(T0, RegState::Implicit)
487 .addReg(T1, RegState::Implicit);
488 break;
489 }
490
491 case AMDGPU::BRANCH:
492 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000493 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 break;
495
496 case AMDGPU::BRANCH_COND_f32: {
497 MachineInstr *NewMI =
498 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
499 AMDGPU::PREDICATE_BIT)
500 .addOperand(MI->getOperand(1))
501 .addImm(OPCODE_IS_NOT_ZERO)
502 .addImm(0); // Flags
503 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000504 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000505 .addOperand(MI->getOperand(0))
506 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
507 break;
508 }
509
510 case AMDGPU::BRANCH_COND_i32: {
511 MachineInstr *NewMI =
512 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
513 AMDGPU::PREDICATE_BIT)
514 .addOperand(MI->getOperand(1))
515 .addImm(OPCODE_IS_NOT_ZERO_INT)
516 .addImm(0); // Flags
517 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000518 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 .addOperand(MI->getOperand(0))
520 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
521 break;
522 }
523
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 case AMDGPU::EG_ExportSwz:
525 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000526 // Instruction is left unmodified if its not the last one of its type
527 bool isLastInstructionOfItsType = true;
528 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000529 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000530 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000531 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000532 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
533 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
534 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
535 .getImm();
536 if (CurrentInstExportType == InstExportType) {
537 isLastInstructionOfItsType = false;
538 break;
539 }
540 }
541 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000542 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000543 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 return BB;
545 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
546 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
547 .addOperand(MI->getOperand(0))
548 .addOperand(MI->getOperand(1))
549 .addOperand(MI->getOperand(2))
550 .addOperand(MI->getOperand(3))
551 .addOperand(MI->getOperand(4))
552 .addOperand(MI->getOperand(5))
553 .addOperand(MI->getOperand(6))
554 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000555 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 break;
557 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000558 case AMDGPU::RETURN: {
559 // RETURN instructions must have the live-out registers as implicit uses,
560 // otherwise they appear dead.
561 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
562 MachineInstrBuilder MIB(*MF, MI);
563 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
564 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
565 return BB;
566 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000567 }
568
569 MI->eraseFromParent();
570 return BB;
571}
572
573//===----------------------------------------------------------------------===//
574// Custom DAG Lowering Operations
575//===----------------------------------------------------------------------===//
576
Tom Stellard75aadc22012-12-11 21:25:42 +0000577SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000578 MachineFunction &MF = DAG.getMachineFunction();
579 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 switch (Op.getOpcode()) {
581 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000582 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
583 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000584 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000585 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000586 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000587 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
588 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000589 case ISD::FCOS:
590 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000592 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000593 case ISD::LOAD: {
594 SDValue Result = LowerLOAD(Op, DAG);
595 assert((!Result.getNode() ||
596 Result.getNode()->getNumValues() == 2) &&
597 "Load should return a value and a chain");
598 return Result;
599 }
600
Matt Arsenault1d555c42014-06-23 18:00:55 +0000601 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000602 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000603 case ISD::INTRINSIC_VOID: {
604 SDValue Chain = Op.getOperand(0);
605 unsigned IntrinsicID =
606 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
607 switch (IntrinsicID) {
608 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000609 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
610 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000611 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000612 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000614 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000615 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000616 const SDValue Args[8] = {
617 Chain,
618 Op.getOperand(2), // Export Value
619 Op.getOperand(3), // ArrayBase
620 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000621 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
622 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
623 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
624 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000625 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000626 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000628
Tom Stellard75aadc22012-12-11 21:25:42 +0000629 // default for switch(IntrinsicID)
630 default: break;
631 }
632 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
633 break;
634 }
635 case ISD::INTRINSIC_WO_CHAIN: {
636 unsigned IntrinsicID =
637 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
638 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000639 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000640 switch(IntrinsicID) {
641 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000642 case AMDGPUIntrinsic::R600_load_input: {
643 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
644 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
645 MachineFunction &MF = DAG.getMachineFunction();
646 MachineRegisterInfo &MRI = MF.getRegInfo();
647 MRI.addLiveIn(Reg);
648 return DAG.getCopyFromReg(DAG.getEntryNode(),
649 SDLoc(DAG.getEntryNode()), Reg, VT);
650 }
651
652 case AMDGPUIntrinsic::R600_interp_input: {
653 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
654 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
655 MachineSDNode *interp;
656 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000657 const R600InstrInfo *TII =
658 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000659 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000660 MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000661 return DAG.getTargetExtractSubreg(
662 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
663 DL, MVT::f32, SDValue(interp, 0));
664 }
665 MachineFunction &MF = DAG.getMachineFunction();
666 MachineRegisterInfo &MRI = MF.getRegInfo();
667 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
668 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
669 MRI.addLiveIn(RegisterI);
670 MRI.addLiveIn(RegisterJ);
671 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
672 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
673 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
674 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
675
676 if (slot % 4 < 2)
677 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000678 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000679 RegisterJNode, RegisterINode);
680 else
681 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000682 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000683 RegisterJNode, RegisterINode);
684 return SDValue(interp, slot % 2);
685 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000686 case AMDGPUIntrinsic::R600_interp_xy:
687 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000688 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000689 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000690 SDValue RegisterINode = Op.getOperand(2);
691 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000692
Vincent Lejeunef143af32013-11-11 22:10:24 +0000693 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000694 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000695 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000696 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000697 else
698 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000699 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000700 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000701 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
702 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000704 case AMDGPUIntrinsic::R600_tex:
705 case AMDGPUIntrinsic::R600_texc:
706 case AMDGPUIntrinsic::R600_txl:
707 case AMDGPUIntrinsic::R600_txlc:
708 case AMDGPUIntrinsic::R600_txb:
709 case AMDGPUIntrinsic::R600_txbc:
710 case AMDGPUIntrinsic::R600_txf:
711 case AMDGPUIntrinsic::R600_txq:
712 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000713 case AMDGPUIntrinsic::R600_ddy:
714 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000715 unsigned TextureOp;
716 switch (IntrinsicID) {
717 case AMDGPUIntrinsic::R600_tex:
718 TextureOp = 0;
719 break;
720 case AMDGPUIntrinsic::R600_texc:
721 TextureOp = 1;
722 break;
723 case AMDGPUIntrinsic::R600_txl:
724 TextureOp = 2;
725 break;
726 case AMDGPUIntrinsic::R600_txlc:
727 TextureOp = 3;
728 break;
729 case AMDGPUIntrinsic::R600_txb:
730 TextureOp = 4;
731 break;
732 case AMDGPUIntrinsic::R600_txbc:
733 TextureOp = 5;
734 break;
735 case AMDGPUIntrinsic::R600_txf:
736 TextureOp = 6;
737 break;
738 case AMDGPUIntrinsic::R600_txq:
739 TextureOp = 7;
740 break;
741 case AMDGPUIntrinsic::R600_ddx:
742 TextureOp = 8;
743 break;
744 case AMDGPUIntrinsic::R600_ddy:
745 TextureOp = 9;
746 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000747 case AMDGPUIntrinsic::R600_ldptr:
748 TextureOp = 10;
749 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000750 default:
751 llvm_unreachable("Unknow Texture Operation");
752 }
753
754 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000756 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000757 DAG.getConstant(0, DL, MVT::i32),
758 DAG.getConstant(1, DL, MVT::i32),
759 DAG.getConstant(2, DL, MVT::i32),
760 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000761 Op.getOperand(2),
762 Op.getOperand(3),
763 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000764 DAG.getConstant(0, DL, MVT::i32),
765 DAG.getConstant(1, DL, MVT::i32),
766 DAG.getConstant(2, DL, MVT::i32),
767 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000768 Op.getOperand(5),
769 Op.getOperand(6),
770 Op.getOperand(7),
771 Op.getOperand(8),
772 Op.getOperand(9),
773 Op.getOperand(10)
774 };
Craig Topper48d114b2014-04-26 18:35:24 +0000775 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000776 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000777 case AMDGPUIntrinsic::AMDGPU_dp4: {
778 SDValue Args[8] = {
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000780 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000782 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000784 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000786 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000788 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000789 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000790 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000791 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000792 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000793 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000794 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000795 };
Craig Topper48d114b2014-04-26 18:35:24 +0000796 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000797 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000798
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000813 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return LowerImplicitParameter(DAG, VT, DL, 8);
817
Jan Veselye5121f32014-10-14 20:05:26 +0000818 case Intrinsic::AMDGPU_read_workdim:
819 return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
820
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000821 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
823 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000824 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000825 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
826 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000827 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000828 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
829 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000830 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000831 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
832 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000833 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000834 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
835 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000836 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000837 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
838 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000839 case Intrinsic::AMDGPU_rsq:
840 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
841 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Marek Olsak43650e42015-03-24 13:40:08 +0000842
843 case AMDGPUIntrinsic::AMDGPU_fract:
844 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
845 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000846 }
847 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
848 break;
849 }
850 } // end switch(Op.getOpcode())
851 return SDValue();
852}
853
854void R600TargetLowering::ReplaceNodeResults(SDNode *N,
855 SmallVectorImpl<SDValue> &Results,
856 SelectionDAG &DAG) const {
857 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000858 default:
859 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
860 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000861 case ISD::FP_TO_UINT:
862 if (N->getValueType(0) == MVT::i1) {
863 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
864 return;
865 }
866 // Fall-through. Since we don't care about out of bounds values
867 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
868 // considers some extra cases which are not necessary here.
869 case ISD::FP_TO_SINT: {
870 SDValue Result;
871 if (expandFP_TO_SINT(N, Result, DAG))
872 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000873 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000874 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000875 case ISD::SDIVREM: {
876 SDValue Op = SDValue(N, 1);
877 SDValue RES = LowerSDIVREM(Op, DAG);
878 Results.push_back(RES);
879 Results.push_back(RES.getValue(1));
880 break;
881 }
882 case ISD::UDIVREM: {
883 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000884 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000885 break;
886 }
887 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000888}
889
Tom Stellard880a80a2014-06-17 16:53:14 +0000890SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
891 SDValue Vector) const {
892
893 SDLoc DL(Vector);
894 EVT VecVT = Vector.getValueType();
895 EVT EltVT = VecVT.getVectorElementType();
896 SmallVector<SDValue, 8> Args;
897
898 for (unsigned i = 0, e = VecVT.getVectorNumElements();
899 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000900 Args.push_back(DAG.getNode(
901 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
902 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000903 }
904
905 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
906}
907
908SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
909 SelectionDAG &DAG) const {
910
911 SDLoc DL(Op);
912 SDValue Vector = Op.getOperand(0);
913 SDValue Index = Op.getOperand(1);
914
915 if (isa<ConstantSDNode>(Index) ||
916 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
917 return Op;
918
919 Vector = vectorToVerticalVector(DAG, Vector);
920 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
921 Vector, Index);
922}
923
924SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
925 SelectionDAG &DAG) const {
926 SDLoc DL(Op);
927 SDValue Vector = Op.getOperand(0);
928 SDValue Value = Op.getOperand(1);
929 SDValue Index = Op.getOperand(2);
930
931 if (isa<ConstantSDNode>(Index) ||
932 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
933 return Op;
934
935 Vector = vectorToVerticalVector(DAG, Vector);
936 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
937 Vector, Value, Index);
938 return vectorToVerticalVector(DAG, Insert);
939}
940
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000941SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
942 // On hw >= R700, COS/SIN input must be between -1. and 1.
943 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
944 EVT VT = Op.getValueType();
945 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000946 SDLoc DL(Op);
947 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
948 DAG.getNode(ISD::FADD, DL, VT,
949 DAG.getNode(ISD::FMUL, DL, VT, Arg,
950 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
951 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000952 unsigned TrigNode;
953 switch (Op.getOpcode()) {
954 case ISD::FCOS:
955 TrigNode = AMDGPUISD::COS_HW;
956 break;
957 case ISD::FSIN:
958 TrigNode = AMDGPUISD::SIN_HW;
959 break;
960 default:
961 llvm_unreachable("Wrong trig opcode");
962 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000963 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
964 DAG.getNode(ISD::FADD, DL, VT, FractPart,
965 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000966 if (Gen >= AMDGPUSubtarget::R700)
967 return TrigVal;
968 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000969 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
970 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000971}
972
Jan Vesely25f36272014-06-18 12:27:13 +0000973SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
974 SDLoc DL(Op);
975 EVT VT = Op.getValueType();
976
977 SDValue Lo = Op.getOperand(0);
978 SDValue Hi = Op.getOperand(1);
979 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000980 SDValue Zero = DAG.getConstant(0, DL, VT);
981 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000982
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000983 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
984 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000985 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
986 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
987
988 // The dance around Width1 is necessary for 0 special case.
989 // Without it the CompShift might be 32, producing incorrect results in
990 // Overflow. So we do the shift in two steps, the alternative is to
991 // add a conditional to filter the special case.
992
993 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
994 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
995
996 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
997 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
998 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
999
1000 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1001 SDValue LoBig = Zero;
1002
1003 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1004 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1005
1006 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1007}
1008
Jan Vesely900ff2e2014-06-18 12:27:15 +00001009SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1010 SDLoc DL(Op);
1011 EVT VT = Op.getValueType();
1012
1013 SDValue Lo = Op.getOperand(0);
1014 SDValue Hi = Op.getOperand(1);
1015 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001016 SDValue Zero = DAG.getConstant(0, DL, VT);
1017 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001018
Jan Veselyecf51332014-06-18 12:27:17 +00001019 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1020
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001021 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1022 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001023 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1024 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1025
1026 // The dance around Width1 is necessary for 0 special case.
1027 // Without it the CompShift might be 32, producing incorrect results in
1028 // Overflow. So we do the shift in two steps, the alternative is to
1029 // add a conditional to filter the special case.
1030
1031 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1032 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1033
Jan Veselyecf51332014-06-18 12:27:17 +00001034 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001035 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1036 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1037
Jan Veselyecf51332014-06-18 12:27:17 +00001038 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1039 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001040
1041 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1042 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1043
1044 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1045}
1046
Jan Vesely808fff52015-04-30 17:15:56 +00001047SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1048 unsigned mainop, unsigned ovf) const {
1049 SDLoc DL(Op);
1050 EVT VT = Op.getValueType();
1051
1052 SDValue Lo = Op.getOperand(0);
1053 SDValue Hi = Op.getOperand(1);
1054
1055 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1056 // Extend sign.
1057 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1058 DAG.getValueType(MVT::i1));
1059
1060 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1061
1062 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1063}
1064
Tom Stellard75aadc22012-12-11 21:25:42 +00001065SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001066 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001067 return DAG.getNode(
1068 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001069 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001070 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001071 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001072 DAG.getCondCode(ISD::SETNE)
1073 );
1074}
1075
Tom Stellard75aadc22012-12-11 21:25:42 +00001076SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001077 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001078 unsigned DwordOffset) const {
1079 unsigned ByteOffset = DwordOffset * 4;
1080 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001081 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001082
1083 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1084 assert(isInt<16>(ByteOffset));
1085
1086 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001087 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001088 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1089 false, false, false, 0);
1090}
1091
Tom Stellard75aadc22012-12-11 21:25:42 +00001092bool R600TargetLowering::isZero(SDValue Op) const {
1093 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1094 return Cst->isNullValue();
1095 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1096 return CstFP->isZero();
1097 } else {
1098 return false;
1099 }
1100}
1101
1102SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001103 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001104 EVT VT = Op.getValueType();
1105
1106 SDValue LHS = Op.getOperand(0);
1107 SDValue RHS = Op.getOperand(1);
1108 SDValue True = Op.getOperand(2);
1109 SDValue False = Op.getOperand(3);
1110 SDValue CC = Op.getOperand(4);
1111 SDValue Temp;
1112
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001113 if (VT == MVT::f32) {
1114 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1115 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1116 if (MinMax)
1117 return MinMax;
1118 }
1119
Tom Stellard75aadc22012-12-11 21:25:42 +00001120 // LHS and RHS are guaranteed to be the same value type
1121 EVT CompareVT = LHS.getValueType();
1122
1123 // Check if we can lower this to a native operation.
1124
Tom Stellard2add82d2013-03-08 15:37:09 +00001125 // Try to lower to a SET* instruction:
1126 //
1127 // SET* can match the following patterns:
1128 //
Tom Stellardcd428182013-09-28 02:50:38 +00001129 // select_cc f32, f32, -1, 0, cc_supported
1130 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1131 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001132 //
1133
1134 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001135 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1136 ISD::CondCode InverseCC =
1137 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001138 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1139 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1140 std::swap(False, True);
1141 CC = DAG.getCondCode(InverseCC);
1142 } else {
1143 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1144 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1145 std::swap(False, True);
1146 std::swap(LHS, RHS);
1147 CC = DAG.getCondCode(SwapInvCC);
1148 }
1149 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001150 }
1151
1152 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1153 (CompareVT == VT || VT == MVT::i32)) {
1154 // This can be matched by a SET* instruction.
1155 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1156 }
1157
Tom Stellard75aadc22012-12-11 21:25:42 +00001158 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001159 //
1160 // CND* can match the following patterns:
1161 //
Tom Stellardcd428182013-09-28 02:50:38 +00001162 // select_cc f32, 0.0, f32, f32, cc_supported
1163 // select_cc f32, 0.0, i32, i32, cc_supported
1164 // select_cc i32, 0, f32, f32, cc_supported
1165 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001166 //
Tom Stellardcd428182013-09-28 02:50:38 +00001167
1168 // Try to move the zero value to the RHS
1169 if (isZero(LHS)) {
1170 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1171 // Try swapping the operands
1172 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1173 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1174 std::swap(LHS, RHS);
1175 CC = DAG.getCondCode(CCSwapped);
1176 } else {
1177 // Try inverting the conditon and then swapping the operands
1178 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1179 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1180 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1181 std::swap(True, False);
1182 std::swap(LHS, RHS);
1183 CC = DAG.getCondCode(CCSwapped);
1184 }
1185 }
1186 }
1187 if (isZero(RHS)) {
1188 SDValue Cond = LHS;
1189 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001190 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1191 if (CompareVT != VT) {
1192 // Bitcast True / False to the correct types. This will end up being
1193 // a nop, but it allows us to define only a single pattern in the
1194 // .TD files for each CND* instruction rather than having to have
1195 // one pattern for integer True/False and one for fp True/False
1196 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1197 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1198 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001199
1200 switch (CCOpcode) {
1201 case ISD::SETONE:
1202 case ISD::SETUNE:
1203 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001204 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1205 Temp = True;
1206 True = False;
1207 False = Temp;
1208 break;
1209 default:
1210 break;
1211 }
1212 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1213 Cond, Zero,
1214 True, False,
1215 DAG.getCondCode(CCOpcode));
1216 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1217 }
1218
Tom Stellard75aadc22012-12-11 21:25:42 +00001219 // If we make it this for it means we have no native instructions to handle
1220 // this SELECT_CC, so we must lower it.
1221 SDValue HWTrue, HWFalse;
1222
1223 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001224 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1225 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001226 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001227 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1228 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001229 }
1230 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001231 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001232 }
1233
1234 // Lower this unsupported SELECT_CC into a combination of two supported
1235 // SELECT_CC operations.
1236 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1237
1238 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1239 Cond, HWFalse,
1240 True, False,
1241 DAG.getCondCode(ISD::SETNE));
1242}
1243
Alp Tokercb402912014-01-24 17:20:08 +00001244/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001245/// convert these pointers to a register index. Each register holds
1246/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1247/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1248/// for indirect addressing.
1249SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1250 unsigned StackWidth,
1251 SelectionDAG &DAG) const {
1252 unsigned SRLPad;
1253 switch(StackWidth) {
1254 case 1:
1255 SRLPad = 2;
1256 break;
1257 case 2:
1258 SRLPad = 3;
1259 break;
1260 case 4:
1261 SRLPad = 4;
1262 break;
1263 default: llvm_unreachable("Invalid stack width");
1264 }
1265
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001266 SDLoc DL(Ptr);
1267 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1268 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001269}
1270
1271void R600TargetLowering::getStackAddress(unsigned StackWidth,
1272 unsigned ElemIdx,
1273 unsigned &Channel,
1274 unsigned &PtrIncr) const {
1275 switch (StackWidth) {
1276 default:
1277 case 1:
1278 Channel = 0;
1279 if (ElemIdx > 0) {
1280 PtrIncr = 1;
1281 } else {
1282 PtrIncr = 0;
1283 }
1284 break;
1285 case 2:
1286 Channel = ElemIdx % 2;
1287 if (ElemIdx == 2) {
1288 PtrIncr = 1;
1289 } else {
1290 PtrIncr = 0;
1291 }
1292 break;
1293 case 4:
1294 Channel = ElemIdx;
1295 PtrIncr = 0;
1296 break;
1297 }
1298}
1299
Tom Stellard75aadc22012-12-11 21:25:42 +00001300SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001301 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001302 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1303 SDValue Chain = Op.getOperand(0);
1304 SDValue Value = Op.getOperand(1);
1305 SDValue Ptr = Op.getOperand(2);
1306
Tom Stellard2ffc3302013-08-26 15:05:44 +00001307 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001308 if (Result.getNode()) {
1309 return Result;
1310 }
1311
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001312 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1313 if (StoreNode->isTruncatingStore()) {
1314 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001315 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001316 EVT MemVT = StoreNode->getMemoryVT();
1317 SDValue MaskConstant;
1318 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001319 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001320 } else {
1321 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001322 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001323 }
1324 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001325 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001326 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001327 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001328 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1329 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001330 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001331 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1332 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1333 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1334 // vector instead.
1335 SDValue Src[4] = {
1336 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001337 DAG.getConstant(0, DL, MVT::i32),
1338 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001339 Mask
1340 };
Craig Topper48d114b2014-04-26 18:35:24 +00001341 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001342 SDValue Args[3] = { Chain, Input, DWordAddr };
1343 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001344 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001345 StoreNode->getMemOperand());
1346 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1347 Value.getValueType().bitsGE(MVT::i32)) {
1348 // Convert pointer from byte address to dword address.
1349 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1350 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001351 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001352
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001353 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001354 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001355 } else {
1356 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1357 }
1358 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001359 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001360 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001361
1362 EVT ValueVT = Value.getValueType();
1363
1364 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1365 return SDValue();
1366 }
1367
Tom Stellarde9373602014-01-22 19:24:14 +00001368 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1369 if (Ret.getNode()) {
1370 return Ret;
1371 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001372 // Lowering for indirect addressing
1373
1374 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001375 const AMDGPUFrameLowering *TFL =
1376 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001377 unsigned StackWidth = TFL->getStackWidth(MF);
1378
1379 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1380
1381 if (ValueVT.isVector()) {
1382 unsigned NumElemVT = ValueVT.getVectorNumElements();
1383 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001384 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001385
1386 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1387 "vector width in load");
1388
1389 for (unsigned i = 0; i < NumElemVT; ++i) {
1390 unsigned Channel, PtrIncr;
1391 getStackAddress(StackWidth, i, Channel, PtrIncr);
1392 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001393 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001394 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001395 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001396
1397 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1398 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001399 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001400 }
Craig Topper48d114b2014-04-26 18:35:24 +00001401 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001402 } else {
1403 if (ValueVT == MVT::i8) {
1404 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1405 }
1406 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001407 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001408 }
1409
1410 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001411}
1412
Tom Stellard365366f2013-01-23 02:09:06 +00001413// return (512 + (kc_bank << 12)
1414static int
1415ConstantAddressBlock(unsigned AddressSpace) {
1416 switch (AddressSpace) {
1417 case AMDGPUAS::CONSTANT_BUFFER_0:
1418 return 512;
1419 case AMDGPUAS::CONSTANT_BUFFER_1:
1420 return 512 + 4096;
1421 case AMDGPUAS::CONSTANT_BUFFER_2:
1422 return 512 + 4096 * 2;
1423 case AMDGPUAS::CONSTANT_BUFFER_3:
1424 return 512 + 4096 * 3;
1425 case AMDGPUAS::CONSTANT_BUFFER_4:
1426 return 512 + 4096 * 4;
1427 case AMDGPUAS::CONSTANT_BUFFER_5:
1428 return 512 + 4096 * 5;
1429 case AMDGPUAS::CONSTANT_BUFFER_6:
1430 return 512 + 4096 * 6;
1431 case AMDGPUAS::CONSTANT_BUFFER_7:
1432 return 512 + 4096 * 7;
1433 case AMDGPUAS::CONSTANT_BUFFER_8:
1434 return 512 + 4096 * 8;
1435 case AMDGPUAS::CONSTANT_BUFFER_9:
1436 return 512 + 4096 * 9;
1437 case AMDGPUAS::CONSTANT_BUFFER_10:
1438 return 512 + 4096 * 10;
1439 case AMDGPUAS::CONSTANT_BUFFER_11:
1440 return 512 + 4096 * 11;
1441 case AMDGPUAS::CONSTANT_BUFFER_12:
1442 return 512 + 4096 * 12;
1443 case AMDGPUAS::CONSTANT_BUFFER_13:
1444 return 512 + 4096 * 13;
1445 case AMDGPUAS::CONSTANT_BUFFER_14:
1446 return 512 + 4096 * 14;
1447 case AMDGPUAS::CONSTANT_BUFFER_15:
1448 return 512 + 4096 * 15;
1449 default:
1450 return -1;
1451 }
1452}
1453
1454SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1455{
1456 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001457 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001458 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1459 SDValue Chain = Op.getOperand(0);
1460 SDValue Ptr = Op.getOperand(1);
1461 SDValue LoweredLoad;
1462
Matt Arsenault8b03e6c2015-07-09 18:47:03 +00001463 if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1464 return Ret;
Tom Stellarde9373602014-01-22 19:24:14 +00001465
Tom Stellard067c8152014-07-21 14:01:14 +00001466 // Lower loads constant address space global variable loads
1467 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001468 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001469 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001470
Mehdi Amini44ede332015-07-09 02:09:04 +00001471 SDValue Ptr = DAG.getZExtOrTrunc(
1472 LoadNode->getBasePtr(), DL,
1473 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001474 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001475 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001476 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1477 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001478 DAG.getTargetConstant(0, DL, MVT::i32),
1479 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001480 }
Tom Stellarde9373602014-01-22 19:24:14 +00001481
Tom Stellard35bb18c2013-08-26 15:06:04 +00001482 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1483 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001484 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001485 Chain
1486 };
Craig Topper64941d92014-04-27 19:20:57 +00001487 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001488 }
1489
Tom Stellard365366f2013-01-23 02:09:06 +00001490 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001491 if (ConstantBlock > -1 &&
1492 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1493 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001494 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001495 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1496 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001497 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001498 SDValue Slots[4];
1499 for (unsigned i = 0; i < 4; i++) {
1500 // We want Const position encoded with the following formula :
1501 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1502 // const_index is Ptr computed by llvm using an alignment of 16.
1503 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1504 // then div by 4 at the ISel step
1505 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001506 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001507 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1508 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001509 EVT NewVT = MVT::v4i32;
1510 unsigned NumElements = 4;
1511 if (VT.isVector()) {
1512 NewVT = VT;
1513 NumElements = VT.getVectorNumElements();
1514 }
Craig Topper48d114b2014-04-26 18:35:24 +00001515 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001516 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001517 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001518 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001519 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001520 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1521 DAG.getConstant(4, DL, MVT::i32)),
1522 DAG.getConstant(LoadNode->getAddressSpace() -
1523 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001524 );
1525 }
1526
1527 if (!VT.isVector()) {
1528 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001529 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001530 }
1531
1532 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001533 Result,
1534 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001535 };
Craig Topper64941d92014-04-27 19:20:57 +00001536 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001537 }
1538
Matt Arsenault909d0c02013-10-30 23:43:29 +00001539 // For most operations returning SDValue() will result in the node being
1540 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1541 // need to manually expand loads that may be legal in some address spaces and
1542 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1543 // compute shaders, since the data is sign extended when it is uploaded to the
1544 // buffer. However SEXT loads from other address spaces are not supported, so
1545 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001546 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1547 EVT MemVT = LoadNode->getMemoryVT();
1548 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001549 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1550 LoadNode->getPointerInfo(), MemVT,
1551 LoadNode->isVolatile(),
1552 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001553 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001554 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001555 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1556 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001557
Jan Veselyb670d372015-05-26 18:07:22 +00001558 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001559 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001560 }
1561
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001562 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1563 return SDValue();
1564 }
1565
1566 // Lowering for indirect addressing
1567 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001568 const AMDGPUFrameLowering *TFL =
1569 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001570 unsigned StackWidth = TFL->getStackWidth(MF);
1571
1572 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1573
1574 if (VT.isVector()) {
1575 unsigned NumElemVT = VT.getVectorNumElements();
1576 EVT ElemVT = VT.getVectorElementType();
1577 SDValue Loads[4];
1578
1579 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1580 "vector width in load");
1581
1582 for (unsigned i = 0; i < NumElemVT; ++i) {
1583 unsigned Channel, PtrIncr;
1584 getStackAddress(StackWidth, i, Channel, PtrIncr);
1585 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001586 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001587 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1588 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001589 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001590 Op.getOperand(2));
1591 }
1592 for (unsigned i = NumElemVT; i < 4; ++i) {
1593 Loads[i] = DAG.getUNDEF(ElemVT);
1594 }
1595 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001596 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001597 } else {
1598 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1599 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001600 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001601 Op.getOperand(2));
1602 }
1603
Matt Arsenault7939acd2014-04-07 16:44:24 +00001604 SDValue Ops[2] = {
1605 LoweredLoad,
1606 Chain
1607 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001608
Craig Topper64941d92014-04-27 19:20:57 +00001609 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001610}
Tom Stellard75aadc22012-12-11 21:25:42 +00001611
Matt Arsenault1d555c42014-06-23 18:00:55 +00001612SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1613 SDValue Chain = Op.getOperand(0);
1614 SDValue Cond = Op.getOperand(1);
1615 SDValue Jump = Op.getOperand(2);
1616
1617 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1618 Chain, Jump, Cond);
1619}
1620
Tom Stellard75aadc22012-12-11 21:25:42 +00001621/// XXX Only kernel functions are supported, so we can assume for now that
1622/// every function is a kernel function, but in the future we should use
1623/// separate calling conventions for kernel and non-kernel functions.
1624SDValue R600TargetLowering::LowerFormalArguments(
1625 SDValue Chain,
1626 CallingConv::ID CallConv,
1627 bool isVarArg,
1628 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001629 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001630 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001631 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001632 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1633 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001634 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001635 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001636
Tom Stellardaf775432013-10-23 00:44:32 +00001637 SmallVector<ISD::InputArg, 8> LocalIns;
1638
Matt Arsenault209a7b92014-04-18 07:40:20 +00001639 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001640
1641 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001642
Tom Stellard1e803092013-07-23 01:48:18 +00001643 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001644 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001645 const ISD::InputArg &In = Ins[i];
1646 EVT VT = In.VT;
1647 EVT MemVT = VA.getLocVT();
1648 if (!VT.isVector() && MemVT.isVector()) {
1649 // Get load source type if scalarized.
1650 MemVT = MemVT.getVectorElementType();
1651 }
Tom Stellard78e01292013-07-23 01:47:58 +00001652
Jan Veselye5121f32014-10-14 20:05:26 +00001653 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001654 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1655 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1656 InVals.push_back(Register);
1657 continue;
1658 }
1659
Tom Stellard75aadc22012-12-11 21:25:42 +00001660 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001661 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001662
Matt Arsenaultfae02982014-03-17 18:58:11 +00001663 // i64 isn't a legal type, so the register type used ends up as i32, which
1664 // isn't expected here. It attempts to create this sextload, but it ends up
1665 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1666 // for <1 x i64>.
1667
Tom Stellardacfeebf2013-07-23 01:48:05 +00001668 // The first 36 bytes of the input buffer contains information about
1669 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001670 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1671 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1672 // FIXME: This should really check the extload type, but the handling of
1673 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001674
Matt Arsenault74ef2772014-08-13 18:14:11 +00001675 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1676 Ext = ISD::SEXTLOAD;
1677 }
1678
1679 // Compute the offset from the value.
1680 // XXX - I think PartOffset should give you this, but it seems to give the
1681 // size of the register which isn't useful.
1682
Andrew Trick05938a52015-02-16 18:10:47 +00001683 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001684 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001685 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001686
1687 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1688 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001689 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001690 DAG.getUNDEF(MVT::i32),
1691 PtrInfo,
1692 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001693
1694 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001695 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001696 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001697 }
1698 return Chain;
1699}
1700
Mehdi Amini44ede332015-07-09 02:09:04 +00001701EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1702 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001703 if (!VT.isVector())
1704 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001705 return VT.changeVectorElementTypeToInteger();
1706}
1707
Matt Arsenault209a7b92014-04-18 07:40:20 +00001708static SDValue CompactSwizzlableVector(
1709 SelectionDAG &DAG, SDValue VectorEntry,
1710 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001711 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1712 assert(RemapSwizzle.empty());
1713 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001714 VectorEntry.getOperand(0),
1715 VectorEntry.getOperand(1),
1716 VectorEntry.getOperand(2),
1717 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001718 };
1719
1720 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001721 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1722 // We mask write here to teach later passes that the ith element of this
1723 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1724 // break false dependencies and additionnaly make assembly easier to read.
1725 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001726 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1727 if (C->isZero()) {
1728 RemapSwizzle[i] = 4; // SEL_0
1729 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1730 } else if (C->isExactlyValue(1.0)) {
1731 RemapSwizzle[i] = 5; // SEL_1
1732 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1733 }
1734 }
1735
1736 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1737 continue;
1738 for (unsigned j = 0; j < i; j++) {
1739 if (NewBldVec[i] == NewBldVec[j]) {
1740 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1741 RemapSwizzle[i] = j;
1742 break;
1743 }
1744 }
1745 }
1746
1747 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001748 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001749}
1750
Benjamin Kramer193960c2013-06-11 13:32:25 +00001751static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1752 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001753 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1754 assert(RemapSwizzle.empty());
1755 SDValue NewBldVec[4] = {
1756 VectorEntry.getOperand(0),
1757 VectorEntry.getOperand(1),
1758 VectorEntry.getOperand(2),
1759 VectorEntry.getOperand(3)
1760 };
1761 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001762 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001763 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001764 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1765 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1766 ->getZExtValue();
1767 if (i == Idx)
1768 isUnmovable[Idx] = true;
1769 }
1770 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001771
1772 for (unsigned i = 0; i < 4; i++) {
1773 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1774 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1775 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001776 if (isUnmovable[Idx])
1777 continue;
1778 // Swap i and Idx
1779 std::swap(NewBldVec[Idx], NewBldVec[i]);
1780 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1781 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001782 }
1783 }
1784
1785 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001786 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001787}
1788
1789
1790SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001791 SDValue Swz[4], SelectionDAG &DAG,
1792 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001793 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1794 // Old -> New swizzle values
1795 DenseMap<unsigned, unsigned> SwizzleRemap;
1796
1797 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1798 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001799 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001800 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001801 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001802 }
1803
1804 SwizzleRemap.clear();
1805 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1806 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001807 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001808 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001809 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001810 }
1811
1812 return BuildVector;
1813}
1814
1815
Tom Stellard75aadc22012-12-11 21:25:42 +00001816//===----------------------------------------------------------------------===//
1817// Custom DAG Optimizations
1818//===----------------------------------------------------------------------===//
1819
1820SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1821 DAGCombinerInfo &DCI) const {
1822 SelectionDAG &DAG = DCI.DAG;
1823
1824 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001825 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001826 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1827 case ISD::FP_ROUND: {
1828 SDValue Arg = N->getOperand(0);
1829 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001830 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001831 Arg.getOperand(0));
1832 }
1833 break;
1834 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001835
1836 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1837 // (i32 select_cc f32, f32, -1, 0 cc)
1838 //
1839 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1840 // this to one of the SET*_DX10 instructions.
1841 case ISD::FP_TO_SINT: {
1842 SDValue FNeg = N->getOperand(0);
1843 if (FNeg.getOpcode() != ISD::FNEG) {
1844 return SDValue();
1845 }
1846 SDValue SelectCC = FNeg.getOperand(0);
1847 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1848 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1849 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1850 !isHWTrueValue(SelectCC.getOperand(2)) ||
1851 !isHWFalseValue(SelectCC.getOperand(3))) {
1852 return SDValue();
1853 }
1854
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001855 SDLoc dl(N);
1856 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001857 SelectCC.getOperand(0), // LHS
1858 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001859 DAG.getConstant(-1, dl, MVT::i32), // True
1860 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001861 SelectCC.getOperand(4)); // CC
1862
1863 break;
1864 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001865
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001866 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1867 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001868 case ISD::INSERT_VECTOR_ELT: {
1869 SDValue InVec = N->getOperand(0);
1870 SDValue InVal = N->getOperand(1);
1871 SDValue EltNo = N->getOperand(2);
1872 SDLoc dl(N);
1873
1874 // If the inserted element is an UNDEF, just use the input vector.
1875 if (InVal.getOpcode() == ISD::UNDEF)
1876 return InVec;
1877
1878 EVT VT = InVec.getValueType();
1879
1880 // If we can't generate a legal BUILD_VECTOR, exit
1881 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1882 return SDValue();
1883
1884 // Check that we know which element is being inserted
1885 if (!isa<ConstantSDNode>(EltNo))
1886 return SDValue();
1887 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1888
1889 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1890 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1891 // vector elements.
1892 SmallVector<SDValue, 8> Ops;
1893 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1894 Ops.append(InVec.getNode()->op_begin(),
1895 InVec.getNode()->op_end());
1896 } else if (InVec.getOpcode() == ISD::UNDEF) {
1897 unsigned NElts = VT.getVectorNumElements();
1898 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1899 } else {
1900 return SDValue();
1901 }
1902
1903 // Insert the element
1904 if (Elt < Ops.size()) {
1905 // All the operands of BUILD_VECTOR must have the same type;
1906 // we enforce that here.
1907 EVT OpVT = Ops[0].getValueType();
1908 if (InVal.getValueType() != OpVT)
1909 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1910 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1911 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1912 Ops[Elt] = InVal;
1913 }
1914
1915 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001916 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001917 }
1918
Tom Stellard365366f2013-01-23 02:09:06 +00001919 // Extract_vec (Build_vector) generated by custom lowering
1920 // also needs to be customly combined
1921 case ISD::EXTRACT_VECTOR_ELT: {
1922 SDValue Arg = N->getOperand(0);
1923 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1924 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1925 unsigned Element = Const->getZExtValue();
1926 return Arg->getOperand(Element);
1927 }
1928 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001929 if (Arg.getOpcode() == ISD::BITCAST &&
1930 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1931 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1932 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001933 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001934 Arg->getOperand(0).getOperand(Element));
1935 }
1936 }
Tom Stellard365366f2013-01-23 02:09:06 +00001937 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001938
1939 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001940 // Try common optimizations
1941 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1942 if (Ret.getNode())
1943 return Ret;
1944
Tom Stellarde06163a2013-02-07 14:02:35 +00001945 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1946 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001947 //
1948 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1949 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001950 SDValue LHS = N->getOperand(0);
1951 if (LHS.getOpcode() != ISD::SELECT_CC) {
1952 return SDValue();
1953 }
1954
1955 SDValue RHS = N->getOperand(1);
1956 SDValue True = N->getOperand(2);
1957 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001958 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001959
1960 if (LHS.getOperand(2).getNode() != True.getNode() ||
1961 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001962 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001963 return SDValue();
1964 }
1965
Tom Stellard5e524892013-03-08 15:37:11 +00001966 switch (NCC) {
1967 default: return SDValue();
1968 case ISD::SETNE: return LHS;
1969 case ISD::SETEQ: {
1970 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1971 LHSCC = ISD::getSetCCInverse(LHSCC,
1972 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001973 if (DCI.isBeforeLegalizeOps() ||
1974 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1975 return DAG.getSelectCC(SDLoc(N),
1976 LHS.getOperand(0),
1977 LHS.getOperand(1),
1978 LHS.getOperand(2),
1979 LHS.getOperand(3),
1980 LHSCC);
1981 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001982 }
Tom Stellard5e524892013-03-08 15:37:11 +00001983 }
Tom Stellardcd428182013-09-28 02:50:38 +00001984 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001985 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001986
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001987 case AMDGPUISD::EXPORT: {
1988 SDValue Arg = N->getOperand(1);
1989 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1990 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001991
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001992 SDValue NewArgs[8] = {
1993 N->getOperand(0), // Chain
1994 SDValue(),
1995 N->getOperand(2), // ArrayBase
1996 N->getOperand(3), // Type
1997 N->getOperand(4), // SWZ_X
1998 N->getOperand(5), // SWZ_Y
1999 N->getOperand(6), // SWZ_Z
2000 N->getOperand(7) // SWZ_W
2001 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002002 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002003 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002004 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002005 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002006 case AMDGPUISD::TEXTURE_FETCH: {
2007 SDValue Arg = N->getOperand(1);
2008 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2009 break;
2010
2011 SDValue NewArgs[19] = {
2012 N->getOperand(0),
2013 N->getOperand(1),
2014 N->getOperand(2),
2015 N->getOperand(3),
2016 N->getOperand(4),
2017 N->getOperand(5),
2018 N->getOperand(6),
2019 N->getOperand(7),
2020 N->getOperand(8),
2021 N->getOperand(9),
2022 N->getOperand(10),
2023 N->getOperand(11),
2024 N->getOperand(12),
2025 N->getOperand(13),
2026 N->getOperand(14),
2027 N->getOperand(15),
2028 N->getOperand(16),
2029 N->getOperand(17),
2030 N->getOperand(18),
2031 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002032 SDLoc DL(N);
2033 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2034 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002035 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002036 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002037
2038 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002039}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002040
2041static bool
2042FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002043 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002044 const R600InstrInfo *TII =
2045 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002046 if (!Src.isMachineOpcode())
2047 return false;
2048 switch (Src.getMachineOpcode()) {
2049 case AMDGPU::FNEG_R600:
2050 if (!Neg.getNode())
2051 return false;
2052 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002053 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002054 return true;
2055 case AMDGPU::FABS_R600:
2056 if (!Abs.getNode())
2057 return false;
2058 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002059 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002060 return true;
2061 case AMDGPU::CONST_COPY: {
2062 unsigned Opcode = ParentNode->getMachineOpcode();
2063 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2064
2065 if (!Sel.getNode())
2066 return false;
2067
2068 SDValue CstOffset = Src.getOperand(0);
2069 if (ParentNode->getValueType(0).isVector())
2070 return false;
2071
2072 // Gather constants values
2073 int SrcIndices[] = {
2074 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2075 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2076 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2077 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2078 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2079 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2080 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2081 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2082 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2085 };
2086 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002087 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002088 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2089 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2090 continue;
2091 if (HasDst) {
2092 OtherSrcIdx--;
2093 OtherSelIdx--;
2094 }
2095 if (RegisterSDNode *Reg =
2096 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2097 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002098 ConstantSDNode *Cst
2099 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002100 Consts.push_back(Cst->getZExtValue());
2101 }
2102 }
2103 }
2104
Matt Arsenault37c12d72014-05-12 20:42:57 +00002105 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002106 Consts.push_back(Cst->getZExtValue());
2107 if (!TII->fitsConstReadLimitations(Consts)) {
2108 return false;
2109 }
2110
2111 Sel = CstOffset;
2112 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2113 return true;
2114 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002115 case AMDGPU::MOV_IMM_I32:
2116 case AMDGPU::MOV_IMM_F32: {
2117 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2118 uint64_t ImmValue = 0;
2119
2120
2121 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2122 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2123 float FloatValue = FPC->getValueAPF().convertToFloat();
2124 if (FloatValue == 0.0) {
2125 ImmReg = AMDGPU::ZERO;
2126 } else if (FloatValue == 0.5) {
2127 ImmReg = AMDGPU::HALF;
2128 } else if (FloatValue == 1.0) {
2129 ImmReg = AMDGPU::ONE;
2130 } else {
2131 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2132 }
2133 } else {
2134 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2135 uint64_t Value = C->getZExtValue();
2136 if (Value == 0) {
2137 ImmReg = AMDGPU::ZERO;
2138 } else if (Value == 1) {
2139 ImmReg = AMDGPU::ONE_INT;
2140 } else {
2141 ImmValue = Value;
2142 }
2143 }
2144
2145 // Check that we aren't already using an immediate.
2146 // XXX: It's possible for an instruction to have more than one
2147 // immediate operand, but this is not supported yet.
2148 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2149 if (!Imm.getNode())
2150 return false;
2151 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2152 assert(C);
2153 if (C->getZExtValue())
2154 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002155 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002156 }
2157 Src = DAG.getRegister(ImmReg, MVT::i32);
2158 return true;
2159 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002160 default:
2161 return false;
2162 }
2163}
2164
2165
2166/// \brief Fold the instructions after selecting them
2167SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2168 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002169 const R600InstrInfo *TII =
2170 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002171 if (!Node->isMachineOpcode())
2172 return Node;
2173 unsigned Opcode = Node->getMachineOpcode();
2174 SDValue FakeOp;
2175
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002176 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002177
2178 if (Opcode == AMDGPU::DOT_4) {
2179 int OperandIdx[] = {
2180 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2181 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2182 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2183 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2184 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2185 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2186 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2187 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002188 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002189 int NegIdx[] = {
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2198 };
2199 int AbsIdx[] = {
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2208 };
2209 for (unsigned i = 0; i < 8; i++) {
2210 if (OperandIdx[i] < 0)
2211 return Node;
2212 SDValue &Src = Ops[OperandIdx[i] - 1];
2213 SDValue &Neg = Ops[NegIdx[i] - 1];
2214 SDValue &Abs = Ops[AbsIdx[i] - 1];
2215 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2216 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2217 if (HasDst)
2218 SelIdx--;
2219 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002220 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2221 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2222 }
2223 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2224 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2225 SDValue &Src = Ops[i];
2226 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002227 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2228 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002229 } else if (Opcode == AMDGPU::CLAMP_R600) {
2230 SDValue Src = Node->getOperand(0);
2231 if (!Src.isMachineOpcode() ||
2232 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2233 return Node;
2234 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2235 AMDGPU::OpName::clamp);
2236 if (ClampIdx < 0)
2237 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002238 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002239 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002240 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2241 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2242 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002243 } else {
2244 if (!TII->hasInstrModifiers(Opcode))
2245 return Node;
2246 int OperandIdx[] = {
2247 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2248 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2249 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2250 };
2251 int NegIdx[] = {
2252 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2253 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2254 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2255 };
2256 int AbsIdx[] = {
2257 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2258 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2259 -1
2260 };
2261 for (unsigned i = 0; i < 3; i++) {
2262 if (OperandIdx[i] < 0)
2263 return Node;
2264 SDValue &Src = Ops[OperandIdx[i] - 1];
2265 SDValue &Neg = Ops[NegIdx[i] - 1];
2266 SDValue FakeAbs;
2267 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2268 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2269 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002270 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2271 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002272 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002273 ImmIdx--;
2274 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002275 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002276 SDValue &Imm = Ops[ImmIdx];
2277 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002278 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2279 }
2280 }
2281
2282 return Node;
2283}