blob: 8357b6d9d0edcf72fd9134b04bf18515d47a6b2f [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
193MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
194 MachineInstr * MI, MachineBasicBlock * BB) const {
195 MachineFunction * MF = BB->getParent();
196 MachineRegisterInfo &MRI = MF->getRegInfo();
197 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000198 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000199 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000200
201 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000202 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000203 // Replace LDS_*_RET instruction that don't have any uses with the
204 // equivalent LDS_*_NORET instruction.
205 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000206 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
207 assert(DstIdx != -1);
208 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000209 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
210 // LDS_1A2D support and remove this special case.
211 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
212 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000213 return BB;
214
215 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
216 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000217 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
218 NewMI.addOperand(MI->getOperand(i));
219 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000220 } else {
221 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
222 }
223 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000224 case AMDGPU::CLAMP_R600: {
225 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
226 AMDGPU::MOV,
227 MI->getOperand(0).getReg(),
228 MI->getOperand(1).getReg());
229 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
230 break;
231 }
232
233 case AMDGPU::FABS_R600: {
234 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
235 AMDGPU::MOV,
236 MI->getOperand(0).getReg(),
237 MI->getOperand(1).getReg());
238 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
239 break;
240 }
241
242 case AMDGPU::FNEG_R600: {
243 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
244 AMDGPU::MOV,
245 MI->getOperand(0).getReg(),
246 MI->getOperand(1).getReg());
247 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
248 break;
249 }
250
Tom Stellard75aadc22012-12-11 21:25:42 +0000251 case AMDGPU::MASK_WRITE: {
252 unsigned maskedRegister = MI->getOperand(0).getReg();
253 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
254 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
255 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
256 break;
257 }
258
259 case AMDGPU::MOV_IMM_F32:
260 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
261 MI->getOperand(1).getFPImm()->getValueAPF()
262 .bitcastToAPInt().getZExtValue());
263 break;
264 case AMDGPU::MOV_IMM_I32:
265 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
266 MI->getOperand(1).getImm());
267 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000268 case AMDGPU::CONST_COPY: {
269 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
270 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000271 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 MI->getOperand(1).getImm());
273 break;
274 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000275
276 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000277 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000278 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000279 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000280
281 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
282 .addOperand(MI->getOperand(0))
283 .addOperand(MI->getOperand(1))
284 .addImm(EOP); // Set End of program bit
285 break;
286 }
287
Tom Stellard75aadc22012-12-11 21:25:42 +0000288 case AMDGPU::TXD: {
289 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
290 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000291 MachineOperand &RID = MI->getOperand(4);
292 MachineOperand &SID = MI->getOperand(5);
293 unsigned TextureId = MI->getOperand(6).getImm();
294 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
295 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000296
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000297 switch (TextureId) {
298 case 5: // Rect
299 CTX = CTY = 0;
300 break;
301 case 6: // Shadow1D
302 SrcW = SrcZ;
303 break;
304 case 7: // Shadow2D
305 SrcW = SrcZ;
306 break;
307 case 8: // ShadowRect
308 CTX = CTY = 0;
309 SrcW = SrcZ;
310 break;
311 case 9: // 1DArray
312 SrcZ = SrcY;
313 CTZ = 0;
314 break;
315 case 10: // 2DArray
316 CTZ = 0;
317 break;
318 case 11: // Shadow1DArray
319 SrcZ = SrcY;
320 CTZ = 0;
321 break;
322 case 12: // Shadow2DArray
323 CTZ = 0;
324 break;
325 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 .addImm(SrcX)
348 .addImm(SrcY)
349 .addImm(SrcZ)
350 .addImm(SrcW)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(1)
356 .addImm(2)
357 .addImm(3)
358 .addOperand(RID)
359 .addOperand(SID)
360 .addImm(CTX)
361 .addImm(CTY)
362 .addImm(CTZ)
363 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
365 .addOperand(MI->getOperand(0))
366 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 .addReg(T0, RegState::Implicit)
385 .addReg(T1, RegState::Implicit);
386 break;
387 }
388
389 case AMDGPU::TXD_SHADOW: {
390 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
391 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000392 MachineOperand &RID = MI->getOperand(4);
393 MachineOperand &SID = MI->getOperand(5);
394 unsigned TextureId = MI->getOperand(6).getImm();
395 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
396 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
397
398 switch (TextureId) {
399 case 5: // Rect
400 CTX = CTY = 0;
401 break;
402 case 6: // Shadow1D
403 SrcW = SrcZ;
404 break;
405 case 7: // Shadow2D
406 SrcW = SrcZ;
407 break;
408 case 8: // ShadowRect
409 CTX = CTY = 0;
410 SrcW = SrcZ;
411 break;
412 case 9: // 1DArray
413 SrcZ = SrcY;
414 CTZ = 0;
415 break;
416 case 10: // 2DArray
417 CTZ = 0;
418 break;
419 case 11: // Shadow1DArray
420 SrcZ = SrcY;
421 CTZ = 0;
422 break;
423 case 12: // Shadow2DArray
424 CTZ = 0;
425 break;
426 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000427
428 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
429 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000430 .addImm(SrcX)
431 .addImm(SrcY)
432 .addImm(SrcZ)
433 .addImm(SrcW)
434 .addImm(0)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(1)
439 .addImm(2)
440 .addImm(3)
441 .addOperand(RID)
442 .addOperand(SID)
443 .addImm(CTX)
444 .addImm(CTY)
445 .addImm(CTZ)
446 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
448 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000449 .addImm(SrcX)
450 .addImm(SrcY)
451 .addImm(SrcZ)
452 .addImm(SrcW)
453 .addImm(0)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(1)
458 .addImm(2)
459 .addImm(3)
460 .addOperand(RID)
461 .addOperand(SID)
462 .addImm(CTX)
463 .addImm(CTY)
464 .addImm(CTZ)
465 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
467 .addOperand(MI->getOperand(0))
468 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000469 .addImm(SrcX)
470 .addImm(SrcY)
471 .addImm(SrcZ)
472 .addImm(SrcW)
473 .addImm(0)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(1)
478 .addImm(2)
479 .addImm(3)
480 .addOperand(RID)
481 .addOperand(SID)
482 .addImm(CTX)
483 .addImm(CTY)
484 .addImm(CTZ)
485 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 .addReg(T0, RegState::Implicit)
487 .addReg(T1, RegState::Implicit);
488 break;
489 }
490
491 case AMDGPU::BRANCH:
492 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000493 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 break;
495
496 case AMDGPU::BRANCH_COND_f32: {
497 MachineInstr *NewMI =
498 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
499 AMDGPU::PREDICATE_BIT)
500 .addOperand(MI->getOperand(1))
501 .addImm(OPCODE_IS_NOT_ZERO)
502 .addImm(0); // Flags
503 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000504 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000505 .addOperand(MI->getOperand(0))
506 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
507 break;
508 }
509
510 case AMDGPU::BRANCH_COND_i32: {
511 MachineInstr *NewMI =
512 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
513 AMDGPU::PREDICATE_BIT)
514 .addOperand(MI->getOperand(1))
515 .addImm(OPCODE_IS_NOT_ZERO_INT)
516 .addImm(0); // Flags
517 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000518 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 .addOperand(MI->getOperand(0))
520 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
521 break;
522 }
523
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 case AMDGPU::EG_ExportSwz:
525 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000526 // Instruction is left unmodified if its not the last one of its type
527 bool isLastInstructionOfItsType = true;
528 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000529 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000530 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000531 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000532 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
533 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
534 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
535 .getImm();
536 if (CurrentInstExportType == InstExportType) {
537 isLastInstructionOfItsType = false;
538 break;
539 }
540 }
541 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000542 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000543 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 return BB;
545 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
546 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
547 .addOperand(MI->getOperand(0))
548 .addOperand(MI->getOperand(1))
549 .addOperand(MI->getOperand(2))
550 .addOperand(MI->getOperand(3))
551 .addOperand(MI->getOperand(4))
552 .addOperand(MI->getOperand(5))
553 .addOperand(MI->getOperand(6))
554 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000555 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 break;
557 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000558 case AMDGPU::RETURN: {
559 // RETURN instructions must have the live-out registers as implicit uses,
560 // otherwise they appear dead.
561 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
562 MachineInstrBuilder MIB(*MF, MI);
563 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
564 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
565 return BB;
566 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000567 }
568
569 MI->eraseFromParent();
570 return BB;
571}
572
573//===----------------------------------------------------------------------===//
574// Custom DAG Lowering Operations
575//===----------------------------------------------------------------------===//
576
Tom Stellard75aadc22012-12-11 21:25:42 +0000577SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000578 MachineFunction &MF = DAG.getMachineFunction();
579 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 switch (Op.getOpcode()) {
581 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000582 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
583 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000584 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000585 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000586 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000587 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
588 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000589 case ISD::FCOS:
590 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000592 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000593 case ISD::LOAD: {
594 SDValue Result = LowerLOAD(Op, DAG);
595 assert((!Result.getNode() ||
596 Result.getNode()->getNumValues() == 2) &&
597 "Load should return a value and a chain");
598 return Result;
599 }
600
Matt Arsenault1d555c42014-06-23 18:00:55 +0000601 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000602 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000603 case ISD::INTRINSIC_VOID: {
604 SDValue Chain = Op.getOperand(0);
605 unsigned IntrinsicID =
606 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
607 switch (IntrinsicID) {
608 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000609 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
610 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000611 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000612 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000614 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000615 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000616 const SDValue Args[8] = {
617 Chain,
618 Op.getOperand(2), // Export Value
619 Op.getOperand(3), // ArrayBase
620 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000621 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
622 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
623 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
624 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000625 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000626 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000628
Tom Stellard75aadc22012-12-11 21:25:42 +0000629 // default for switch(IntrinsicID)
630 default: break;
631 }
632 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
633 break;
634 }
635 case ISD::INTRINSIC_WO_CHAIN: {
636 unsigned IntrinsicID =
637 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
638 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000639 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000640 switch(IntrinsicID) {
641 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000642 case AMDGPUIntrinsic::R600_load_input: {
643 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
644 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
645 MachineFunction &MF = DAG.getMachineFunction();
646 MachineRegisterInfo &MRI = MF.getRegInfo();
647 MRI.addLiveIn(Reg);
648 return DAG.getCopyFromReg(DAG.getEntryNode(),
649 SDLoc(DAG.getEntryNode()), Reg, VT);
650 }
651
652 case AMDGPUIntrinsic::R600_interp_input: {
653 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
654 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
655 MachineSDNode *interp;
656 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000657 const R600InstrInfo *TII =
658 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000659 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000660 MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000661 return DAG.getTargetExtractSubreg(
662 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
663 DL, MVT::f32, SDValue(interp, 0));
664 }
665 MachineFunction &MF = DAG.getMachineFunction();
666 MachineRegisterInfo &MRI = MF.getRegInfo();
667 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
668 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
669 MRI.addLiveIn(RegisterI);
670 MRI.addLiveIn(RegisterJ);
671 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
672 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
673 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
674 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
675
676 if (slot % 4 < 2)
677 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000678 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000679 RegisterJNode, RegisterINode);
680 else
681 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000682 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000683 RegisterJNode, RegisterINode);
684 return SDValue(interp, slot % 2);
685 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000686 case AMDGPUIntrinsic::R600_interp_xy:
687 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000688 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000689 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000690 SDValue RegisterINode = Op.getOperand(2);
691 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000692
Vincent Lejeunef143af32013-11-11 22:10:24 +0000693 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000694 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000695 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000696 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000697 else
698 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000699 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000700 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000701 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
702 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000704 case AMDGPUIntrinsic::R600_tex:
705 case AMDGPUIntrinsic::R600_texc:
706 case AMDGPUIntrinsic::R600_txl:
707 case AMDGPUIntrinsic::R600_txlc:
708 case AMDGPUIntrinsic::R600_txb:
709 case AMDGPUIntrinsic::R600_txbc:
710 case AMDGPUIntrinsic::R600_txf:
711 case AMDGPUIntrinsic::R600_txq:
712 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000713 case AMDGPUIntrinsic::R600_ddy:
714 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000715 unsigned TextureOp;
716 switch (IntrinsicID) {
717 case AMDGPUIntrinsic::R600_tex:
718 TextureOp = 0;
719 break;
720 case AMDGPUIntrinsic::R600_texc:
721 TextureOp = 1;
722 break;
723 case AMDGPUIntrinsic::R600_txl:
724 TextureOp = 2;
725 break;
726 case AMDGPUIntrinsic::R600_txlc:
727 TextureOp = 3;
728 break;
729 case AMDGPUIntrinsic::R600_txb:
730 TextureOp = 4;
731 break;
732 case AMDGPUIntrinsic::R600_txbc:
733 TextureOp = 5;
734 break;
735 case AMDGPUIntrinsic::R600_txf:
736 TextureOp = 6;
737 break;
738 case AMDGPUIntrinsic::R600_txq:
739 TextureOp = 7;
740 break;
741 case AMDGPUIntrinsic::R600_ddx:
742 TextureOp = 8;
743 break;
744 case AMDGPUIntrinsic::R600_ddy:
745 TextureOp = 9;
746 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000747 case AMDGPUIntrinsic::R600_ldptr:
748 TextureOp = 10;
749 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000750 default:
751 llvm_unreachable("Unknow Texture Operation");
752 }
753
754 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000756 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000757 DAG.getConstant(0, DL, MVT::i32),
758 DAG.getConstant(1, DL, MVT::i32),
759 DAG.getConstant(2, DL, MVT::i32),
760 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000761 Op.getOperand(2),
762 Op.getOperand(3),
763 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000764 DAG.getConstant(0, DL, MVT::i32),
765 DAG.getConstant(1, DL, MVT::i32),
766 DAG.getConstant(2, DL, MVT::i32),
767 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000768 Op.getOperand(5),
769 Op.getOperand(6),
770 Op.getOperand(7),
771 Op.getOperand(8),
772 Op.getOperand(9),
773 Op.getOperand(10)
774 };
Craig Topper48d114b2014-04-26 18:35:24 +0000775 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000776 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000777 case AMDGPUIntrinsic::AMDGPU_dp4: {
778 SDValue Args[8] = {
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000780 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000782 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000784 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000786 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000788 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000789 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000790 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000791 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000792 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000793 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000794 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000795 };
Craig Topper48d114b2014-04-26 18:35:24 +0000796 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000797 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000798
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000813 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return LowerImplicitParameter(DAG, VT, DL, 8);
817
Jan Veselye5121f32014-10-14 20:05:26 +0000818 case Intrinsic::AMDGPU_read_workdim:
819 return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
820
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000821 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
823 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000824 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000825 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
826 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000827 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000828 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
829 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000830 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000831 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
832 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000833 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000834 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
835 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000836 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000837 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
838 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000839 case Intrinsic::AMDGPU_rsq:
840 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
841 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Marek Olsak43650e42015-03-24 13:40:08 +0000842
843 case AMDGPUIntrinsic::AMDGPU_fract:
844 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
845 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000846 }
847 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
848 break;
849 }
850 } // end switch(Op.getOpcode())
851 return SDValue();
852}
853
854void R600TargetLowering::ReplaceNodeResults(SDNode *N,
855 SmallVectorImpl<SDValue> &Results,
856 SelectionDAG &DAG) const {
857 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000858 default:
859 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
860 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000861 case ISD::FP_TO_UINT:
862 if (N->getValueType(0) == MVT::i1) {
863 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
864 return;
865 }
866 // Fall-through. Since we don't care about out of bounds values
867 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
868 // considers some extra cases which are not necessary here.
869 case ISD::FP_TO_SINT: {
870 SDValue Result;
871 if (expandFP_TO_SINT(N, Result, DAG))
872 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000873 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000874 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000875 case ISD::SDIVREM: {
876 SDValue Op = SDValue(N, 1);
877 SDValue RES = LowerSDIVREM(Op, DAG);
878 Results.push_back(RES);
879 Results.push_back(RES.getValue(1));
880 break;
881 }
882 case ISD::UDIVREM: {
883 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000884 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000885 break;
886 }
887 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000888}
889
Tom Stellard880a80a2014-06-17 16:53:14 +0000890SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
891 SDValue Vector) const {
892
893 SDLoc DL(Vector);
894 EVT VecVT = Vector.getValueType();
895 EVT EltVT = VecVT.getVectorElementType();
896 SmallVector<SDValue, 8> Args;
897
898 for (unsigned i = 0, e = VecVT.getVectorNumElements();
899 i != e; ++i) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000900 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
901 DAG.getConstant(i, DL, getVectorIdxTy())));
Tom Stellard880a80a2014-06-17 16:53:14 +0000902 }
903
904 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
905}
906
907SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
908 SelectionDAG &DAG) const {
909
910 SDLoc DL(Op);
911 SDValue Vector = Op.getOperand(0);
912 SDValue Index = Op.getOperand(1);
913
914 if (isa<ConstantSDNode>(Index) ||
915 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
916 return Op;
917
918 Vector = vectorToVerticalVector(DAG, Vector);
919 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
920 Vector, Index);
921}
922
923SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
924 SelectionDAG &DAG) const {
925 SDLoc DL(Op);
926 SDValue Vector = Op.getOperand(0);
927 SDValue Value = Op.getOperand(1);
928 SDValue Index = Op.getOperand(2);
929
930 if (isa<ConstantSDNode>(Index) ||
931 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
932 return Op;
933
934 Vector = vectorToVerticalVector(DAG, Vector);
935 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
936 Vector, Value, Index);
937 return vectorToVerticalVector(DAG, Insert);
938}
939
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000940SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
941 // On hw >= R700, COS/SIN input must be between -1. and 1.
942 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
943 EVT VT = Op.getValueType();
944 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000945 SDLoc DL(Op);
946 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
947 DAG.getNode(ISD::FADD, DL, VT,
948 DAG.getNode(ISD::FMUL, DL, VT, Arg,
949 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
950 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000951 unsigned TrigNode;
952 switch (Op.getOpcode()) {
953 case ISD::FCOS:
954 TrigNode = AMDGPUISD::COS_HW;
955 break;
956 case ISD::FSIN:
957 TrigNode = AMDGPUISD::SIN_HW;
958 break;
959 default:
960 llvm_unreachable("Wrong trig opcode");
961 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000962 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
963 DAG.getNode(ISD::FADD, DL, VT, FractPart,
964 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000965 if (Gen >= AMDGPUSubtarget::R700)
966 return TrigVal;
967 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000968 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
969 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000970}
971
Jan Vesely25f36272014-06-18 12:27:13 +0000972SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
973 SDLoc DL(Op);
974 EVT VT = Op.getValueType();
975
976 SDValue Lo = Op.getOperand(0);
977 SDValue Hi = Op.getOperand(1);
978 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000979 SDValue Zero = DAG.getConstant(0, DL, VT);
980 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000981
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000982 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
983 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000984 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
985 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
986
987 // The dance around Width1 is necessary for 0 special case.
988 // Without it the CompShift might be 32, producing incorrect results in
989 // Overflow. So we do the shift in two steps, the alternative is to
990 // add a conditional to filter the special case.
991
992 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
993 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
994
995 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
996 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
997 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
998
999 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1000 SDValue LoBig = Zero;
1001
1002 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1003 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1004
1005 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1006}
1007
Jan Vesely900ff2e2014-06-18 12:27:15 +00001008SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1009 SDLoc DL(Op);
1010 EVT VT = Op.getValueType();
1011
1012 SDValue Lo = Op.getOperand(0);
1013 SDValue Hi = Op.getOperand(1);
1014 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001015 SDValue Zero = DAG.getConstant(0, DL, VT);
1016 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001017
Jan Veselyecf51332014-06-18 12:27:17 +00001018 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1019
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001020 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1021 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001022 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1023 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1024
1025 // The dance around Width1 is necessary for 0 special case.
1026 // Without it the CompShift might be 32, producing incorrect results in
1027 // Overflow. So we do the shift in two steps, the alternative is to
1028 // add a conditional to filter the special case.
1029
1030 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1031 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1032
Jan Veselyecf51332014-06-18 12:27:17 +00001033 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001034 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1035 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1036
Jan Veselyecf51332014-06-18 12:27:17 +00001037 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1038 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001039
1040 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1041 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1042
1043 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1044}
1045
Jan Vesely808fff52015-04-30 17:15:56 +00001046SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1047 unsigned mainop, unsigned ovf) const {
1048 SDLoc DL(Op);
1049 EVT VT = Op.getValueType();
1050
1051 SDValue Lo = Op.getOperand(0);
1052 SDValue Hi = Op.getOperand(1);
1053
1054 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1055 // Extend sign.
1056 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1057 DAG.getValueType(MVT::i1));
1058
1059 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1060
1061 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1062}
1063
Tom Stellard75aadc22012-12-11 21:25:42 +00001064SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001065 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001066 return DAG.getNode(
1067 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001068 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001069 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001070 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001071 DAG.getCondCode(ISD::SETNE)
1072 );
1073}
1074
Tom Stellard75aadc22012-12-11 21:25:42 +00001075SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001076 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001077 unsigned DwordOffset) const {
1078 unsigned ByteOffset = DwordOffset * 4;
1079 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001080 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001081
1082 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1083 assert(isInt<16>(ByteOffset));
1084
1085 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001086 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001087 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1088 false, false, false, 0);
1089}
1090
Tom Stellard75aadc22012-12-11 21:25:42 +00001091bool R600TargetLowering::isZero(SDValue Op) const {
1092 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1093 return Cst->isNullValue();
1094 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1095 return CstFP->isZero();
1096 } else {
1097 return false;
1098 }
1099}
1100
1101SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001102 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001103 EVT VT = Op.getValueType();
1104
1105 SDValue LHS = Op.getOperand(0);
1106 SDValue RHS = Op.getOperand(1);
1107 SDValue True = Op.getOperand(2);
1108 SDValue False = Op.getOperand(3);
1109 SDValue CC = Op.getOperand(4);
1110 SDValue Temp;
1111
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001112 if (VT == MVT::f32) {
1113 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1114 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1115 if (MinMax)
1116 return MinMax;
1117 }
1118
Tom Stellard75aadc22012-12-11 21:25:42 +00001119 // LHS and RHS are guaranteed to be the same value type
1120 EVT CompareVT = LHS.getValueType();
1121
1122 // Check if we can lower this to a native operation.
1123
Tom Stellard2add82d2013-03-08 15:37:09 +00001124 // Try to lower to a SET* instruction:
1125 //
1126 // SET* can match the following patterns:
1127 //
Tom Stellardcd428182013-09-28 02:50:38 +00001128 // select_cc f32, f32, -1, 0, cc_supported
1129 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1130 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001131 //
1132
1133 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001134 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1135 ISD::CondCode InverseCC =
1136 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001137 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1138 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1139 std::swap(False, True);
1140 CC = DAG.getCondCode(InverseCC);
1141 } else {
1142 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1143 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1144 std::swap(False, True);
1145 std::swap(LHS, RHS);
1146 CC = DAG.getCondCode(SwapInvCC);
1147 }
1148 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001149 }
1150
1151 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1152 (CompareVT == VT || VT == MVT::i32)) {
1153 // This can be matched by a SET* instruction.
1154 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1155 }
1156
Tom Stellard75aadc22012-12-11 21:25:42 +00001157 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001158 //
1159 // CND* can match the following patterns:
1160 //
Tom Stellardcd428182013-09-28 02:50:38 +00001161 // select_cc f32, 0.0, f32, f32, cc_supported
1162 // select_cc f32, 0.0, i32, i32, cc_supported
1163 // select_cc i32, 0, f32, f32, cc_supported
1164 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001165 //
Tom Stellardcd428182013-09-28 02:50:38 +00001166
1167 // Try to move the zero value to the RHS
1168 if (isZero(LHS)) {
1169 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1170 // Try swapping the operands
1171 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1172 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1173 std::swap(LHS, RHS);
1174 CC = DAG.getCondCode(CCSwapped);
1175 } else {
1176 // Try inverting the conditon and then swapping the operands
1177 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1178 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1179 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1180 std::swap(True, False);
1181 std::swap(LHS, RHS);
1182 CC = DAG.getCondCode(CCSwapped);
1183 }
1184 }
1185 }
1186 if (isZero(RHS)) {
1187 SDValue Cond = LHS;
1188 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001189 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1190 if (CompareVT != VT) {
1191 // Bitcast True / False to the correct types. This will end up being
1192 // a nop, but it allows us to define only a single pattern in the
1193 // .TD files for each CND* instruction rather than having to have
1194 // one pattern for integer True/False and one for fp True/False
1195 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1196 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1197 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001198
1199 switch (CCOpcode) {
1200 case ISD::SETONE:
1201 case ISD::SETUNE:
1202 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001203 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1204 Temp = True;
1205 True = False;
1206 False = Temp;
1207 break;
1208 default:
1209 break;
1210 }
1211 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1212 Cond, Zero,
1213 True, False,
1214 DAG.getCondCode(CCOpcode));
1215 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1216 }
1217
Tom Stellard75aadc22012-12-11 21:25:42 +00001218 // If we make it this for it means we have no native instructions to handle
1219 // this SELECT_CC, so we must lower it.
1220 SDValue HWTrue, HWFalse;
1221
1222 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001223 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1224 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001225 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001226 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1227 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001228 }
1229 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001230 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001231 }
1232
1233 // Lower this unsupported SELECT_CC into a combination of two supported
1234 // SELECT_CC operations.
1235 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1236
1237 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1238 Cond, HWFalse,
1239 True, False,
1240 DAG.getCondCode(ISD::SETNE));
1241}
1242
Alp Tokercb402912014-01-24 17:20:08 +00001243/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001244/// convert these pointers to a register index. Each register holds
1245/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1246/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1247/// for indirect addressing.
1248SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1249 unsigned StackWidth,
1250 SelectionDAG &DAG) const {
1251 unsigned SRLPad;
1252 switch(StackWidth) {
1253 case 1:
1254 SRLPad = 2;
1255 break;
1256 case 2:
1257 SRLPad = 3;
1258 break;
1259 case 4:
1260 SRLPad = 4;
1261 break;
1262 default: llvm_unreachable("Invalid stack width");
1263 }
1264
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001265 SDLoc DL(Ptr);
1266 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1267 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001268}
1269
1270void R600TargetLowering::getStackAddress(unsigned StackWidth,
1271 unsigned ElemIdx,
1272 unsigned &Channel,
1273 unsigned &PtrIncr) const {
1274 switch (StackWidth) {
1275 default:
1276 case 1:
1277 Channel = 0;
1278 if (ElemIdx > 0) {
1279 PtrIncr = 1;
1280 } else {
1281 PtrIncr = 0;
1282 }
1283 break;
1284 case 2:
1285 Channel = ElemIdx % 2;
1286 if (ElemIdx == 2) {
1287 PtrIncr = 1;
1288 } else {
1289 PtrIncr = 0;
1290 }
1291 break;
1292 case 4:
1293 Channel = ElemIdx;
1294 PtrIncr = 0;
1295 break;
1296 }
1297}
1298
Tom Stellard75aadc22012-12-11 21:25:42 +00001299SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001300 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001301 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1302 SDValue Chain = Op.getOperand(0);
1303 SDValue Value = Op.getOperand(1);
1304 SDValue Ptr = Op.getOperand(2);
1305
Tom Stellard2ffc3302013-08-26 15:05:44 +00001306 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001307 if (Result.getNode()) {
1308 return Result;
1309 }
1310
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001311 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1312 if (StoreNode->isTruncatingStore()) {
1313 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001314 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001315 EVT MemVT = StoreNode->getMemoryVT();
1316 SDValue MaskConstant;
1317 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001318 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001319 } else {
1320 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001321 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001322 }
1323 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001324 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001325 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001326 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001327 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1328 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001329 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001330 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1331 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1332 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1333 // vector instead.
1334 SDValue Src[4] = {
1335 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001336 DAG.getConstant(0, DL, MVT::i32),
1337 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001338 Mask
1339 };
Craig Topper48d114b2014-04-26 18:35:24 +00001340 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001341 SDValue Args[3] = { Chain, Input, DWordAddr };
1342 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001343 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001344 StoreNode->getMemOperand());
1345 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1346 Value.getValueType().bitsGE(MVT::i32)) {
1347 // Convert pointer from byte address to dword address.
1348 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1349 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001350 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001351
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001352 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001353 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001354 } else {
1355 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1356 }
1357 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001358 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001359 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001360
1361 EVT ValueVT = Value.getValueType();
1362
1363 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1364 return SDValue();
1365 }
1366
Tom Stellarde9373602014-01-22 19:24:14 +00001367 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1368 if (Ret.getNode()) {
1369 return Ret;
1370 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001371 // Lowering for indirect addressing
1372
1373 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001374 const AMDGPUFrameLowering *TFL =
1375 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001376 unsigned StackWidth = TFL->getStackWidth(MF);
1377
1378 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1379
1380 if (ValueVT.isVector()) {
1381 unsigned NumElemVT = ValueVT.getVectorNumElements();
1382 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001383 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001384
1385 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1386 "vector width in load");
1387
1388 for (unsigned i = 0; i < NumElemVT; ++i) {
1389 unsigned Channel, PtrIncr;
1390 getStackAddress(StackWidth, i, Channel, PtrIncr);
1391 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001392 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001393 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001394 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001395
1396 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1397 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001398 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001399 }
Craig Topper48d114b2014-04-26 18:35:24 +00001400 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001401 } else {
1402 if (ValueVT == MVT::i8) {
1403 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1404 }
1405 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001406 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001407 }
1408
1409 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001410}
1411
Tom Stellard365366f2013-01-23 02:09:06 +00001412// return (512 + (kc_bank << 12)
1413static int
1414ConstantAddressBlock(unsigned AddressSpace) {
1415 switch (AddressSpace) {
1416 case AMDGPUAS::CONSTANT_BUFFER_0:
1417 return 512;
1418 case AMDGPUAS::CONSTANT_BUFFER_1:
1419 return 512 + 4096;
1420 case AMDGPUAS::CONSTANT_BUFFER_2:
1421 return 512 + 4096 * 2;
1422 case AMDGPUAS::CONSTANT_BUFFER_3:
1423 return 512 + 4096 * 3;
1424 case AMDGPUAS::CONSTANT_BUFFER_4:
1425 return 512 + 4096 * 4;
1426 case AMDGPUAS::CONSTANT_BUFFER_5:
1427 return 512 + 4096 * 5;
1428 case AMDGPUAS::CONSTANT_BUFFER_6:
1429 return 512 + 4096 * 6;
1430 case AMDGPUAS::CONSTANT_BUFFER_7:
1431 return 512 + 4096 * 7;
1432 case AMDGPUAS::CONSTANT_BUFFER_8:
1433 return 512 + 4096 * 8;
1434 case AMDGPUAS::CONSTANT_BUFFER_9:
1435 return 512 + 4096 * 9;
1436 case AMDGPUAS::CONSTANT_BUFFER_10:
1437 return 512 + 4096 * 10;
1438 case AMDGPUAS::CONSTANT_BUFFER_11:
1439 return 512 + 4096 * 11;
1440 case AMDGPUAS::CONSTANT_BUFFER_12:
1441 return 512 + 4096 * 12;
1442 case AMDGPUAS::CONSTANT_BUFFER_13:
1443 return 512 + 4096 * 13;
1444 case AMDGPUAS::CONSTANT_BUFFER_14:
1445 return 512 + 4096 * 14;
1446 case AMDGPUAS::CONSTANT_BUFFER_15:
1447 return 512 + 4096 * 15;
1448 default:
1449 return -1;
1450 }
1451}
1452
1453SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1454{
1455 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001456 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001457 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1458 SDValue Chain = Op.getOperand(0);
1459 SDValue Ptr = Op.getOperand(1);
1460 SDValue LoweredLoad;
1461
Tom Stellarde9373602014-01-22 19:24:14 +00001462 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1463 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001464 SDValue Ops[2] = {
1465 Ret,
1466 Chain
1467 };
Craig Topper64941d92014-04-27 19:20:57 +00001468 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001469 }
1470
Tom Stellard067c8152014-07-21 14:01:14 +00001471 // Lower loads constant address space global variable loads
1472 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001473 isa<GlobalVariable>(GetUnderlyingObject(
1474 LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001475
1476 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1477 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1478 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001479 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001480 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1481 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001482 DAG.getTargetConstant(0, DL, MVT::i32),
1483 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001484 }
Tom Stellarde9373602014-01-22 19:24:14 +00001485
Tom Stellard35bb18c2013-08-26 15:06:04 +00001486 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1487 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001488 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001489 Chain
1490 };
Craig Topper64941d92014-04-27 19:20:57 +00001491 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001492 }
1493
Tom Stellard365366f2013-01-23 02:09:06 +00001494 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001495 if (ConstantBlock > -1 &&
1496 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1497 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001498 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001499 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1500 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001501 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001502 SDValue Slots[4];
1503 for (unsigned i = 0; i < 4; i++) {
1504 // We want Const position encoded with the following formula :
1505 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1506 // const_index is Ptr computed by llvm using an alignment of 16.
1507 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1508 // then div by 4 at the ISel step
1509 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001510 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001511 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1512 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001513 EVT NewVT = MVT::v4i32;
1514 unsigned NumElements = 4;
1515 if (VT.isVector()) {
1516 NewVT = VT;
1517 NumElements = VT.getVectorNumElements();
1518 }
Craig Topper48d114b2014-04-26 18:35:24 +00001519 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001520 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001521 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001522 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001523 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001524 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1525 DAG.getConstant(4, DL, MVT::i32)),
1526 DAG.getConstant(LoadNode->getAddressSpace() -
1527 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001528 );
1529 }
1530
1531 if (!VT.isVector()) {
1532 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001533 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001534 }
1535
1536 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001537 Result,
1538 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001539 };
Craig Topper64941d92014-04-27 19:20:57 +00001540 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001541 }
1542
Matt Arsenault909d0c02013-10-30 23:43:29 +00001543 // For most operations returning SDValue() will result in the node being
1544 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1545 // need to manually expand loads that may be legal in some address spaces and
1546 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1547 // compute shaders, since the data is sign extended when it is uploaded to the
1548 // buffer. However SEXT loads from other address spaces are not supported, so
1549 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001550 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1551 EVT MemVT = LoadNode->getMemoryVT();
1552 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001553 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1554 LoadNode->getPointerInfo(), MemVT,
1555 LoadNode->isVolatile(),
1556 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001557 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001558 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001559 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1560 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001561
Jan Veselyb670d372015-05-26 18:07:22 +00001562 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001563 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001564 }
1565
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001566 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1567 return SDValue();
1568 }
1569
1570 // Lowering for indirect addressing
1571 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001572 const AMDGPUFrameLowering *TFL =
1573 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001574 unsigned StackWidth = TFL->getStackWidth(MF);
1575
1576 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1577
1578 if (VT.isVector()) {
1579 unsigned NumElemVT = VT.getVectorNumElements();
1580 EVT ElemVT = VT.getVectorElementType();
1581 SDValue Loads[4];
1582
1583 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1584 "vector width in load");
1585
1586 for (unsigned i = 0; i < NumElemVT; ++i) {
1587 unsigned Channel, PtrIncr;
1588 getStackAddress(StackWidth, i, Channel, PtrIncr);
1589 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001590 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001591 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1592 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001593 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001594 Op.getOperand(2));
1595 }
1596 for (unsigned i = NumElemVT; i < 4; ++i) {
1597 Loads[i] = DAG.getUNDEF(ElemVT);
1598 }
1599 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001600 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001601 } else {
1602 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1603 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001604 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001605 Op.getOperand(2));
1606 }
1607
Matt Arsenault7939acd2014-04-07 16:44:24 +00001608 SDValue Ops[2] = {
1609 LoweredLoad,
1610 Chain
1611 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001612
Craig Topper64941d92014-04-27 19:20:57 +00001613 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001614}
Tom Stellard75aadc22012-12-11 21:25:42 +00001615
Matt Arsenault1d555c42014-06-23 18:00:55 +00001616SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1617 SDValue Chain = Op.getOperand(0);
1618 SDValue Cond = Op.getOperand(1);
1619 SDValue Jump = Op.getOperand(2);
1620
1621 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1622 Chain, Jump, Cond);
1623}
1624
Tom Stellard75aadc22012-12-11 21:25:42 +00001625/// XXX Only kernel functions are supported, so we can assume for now that
1626/// every function is a kernel function, but in the future we should use
1627/// separate calling conventions for kernel and non-kernel functions.
1628SDValue R600TargetLowering::LowerFormalArguments(
1629 SDValue Chain,
1630 CallingConv::ID CallConv,
1631 bool isVarArg,
1632 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001633 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001634 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001635 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001636 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1637 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001638 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001639 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001640
Tom Stellardaf775432013-10-23 00:44:32 +00001641 SmallVector<ISD::InputArg, 8> LocalIns;
1642
Matt Arsenault209a7b92014-04-18 07:40:20 +00001643 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001644
1645 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001646
Tom Stellard1e803092013-07-23 01:48:18 +00001647 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001648 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001649 const ISD::InputArg &In = Ins[i];
1650 EVT VT = In.VT;
1651 EVT MemVT = VA.getLocVT();
1652 if (!VT.isVector() && MemVT.isVector()) {
1653 // Get load source type if scalarized.
1654 MemVT = MemVT.getVectorElementType();
1655 }
Tom Stellard78e01292013-07-23 01:47:58 +00001656
Jan Veselye5121f32014-10-14 20:05:26 +00001657 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001658 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1659 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1660 InVals.push_back(Register);
1661 continue;
1662 }
1663
Tom Stellard75aadc22012-12-11 21:25:42 +00001664 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001665 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001666
Matt Arsenaultfae02982014-03-17 18:58:11 +00001667 // i64 isn't a legal type, so the register type used ends up as i32, which
1668 // isn't expected here. It attempts to create this sextload, but it ends up
1669 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1670 // for <1 x i64>.
1671
Tom Stellardacfeebf2013-07-23 01:48:05 +00001672 // The first 36 bytes of the input buffer contains information about
1673 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001674 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1675 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1676 // FIXME: This should really check the extload type, but the handling of
1677 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001678
Matt Arsenault74ef2772014-08-13 18:14:11 +00001679 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1680 Ext = ISD::SEXTLOAD;
1681 }
1682
1683 // Compute the offset from the value.
1684 // XXX - I think PartOffset should give you this, but it seems to give the
1685 // size of the register which isn't useful.
1686
Andrew Trick05938a52015-02-16 18:10:47 +00001687 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001688 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001689 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001690
1691 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1692 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001693 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001694 DAG.getUNDEF(MVT::i32),
1695 PtrInfo,
1696 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001697
1698 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001699 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001700 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001701 }
1702 return Chain;
1703}
1704
Matt Arsenault758659232013-05-18 00:21:46 +00001705EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001706 if (!VT.isVector())
1707 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001708 return VT.changeVectorElementTypeToInteger();
1709}
1710
Matt Arsenault209a7b92014-04-18 07:40:20 +00001711static SDValue CompactSwizzlableVector(
1712 SelectionDAG &DAG, SDValue VectorEntry,
1713 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001714 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1715 assert(RemapSwizzle.empty());
1716 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001717 VectorEntry.getOperand(0),
1718 VectorEntry.getOperand(1),
1719 VectorEntry.getOperand(2),
1720 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001721 };
1722
1723 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001724 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1725 // We mask write here to teach later passes that the ith element of this
1726 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1727 // break false dependencies and additionnaly make assembly easier to read.
1728 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001729 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1730 if (C->isZero()) {
1731 RemapSwizzle[i] = 4; // SEL_0
1732 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1733 } else if (C->isExactlyValue(1.0)) {
1734 RemapSwizzle[i] = 5; // SEL_1
1735 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1736 }
1737 }
1738
1739 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1740 continue;
1741 for (unsigned j = 0; j < i; j++) {
1742 if (NewBldVec[i] == NewBldVec[j]) {
1743 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1744 RemapSwizzle[i] = j;
1745 break;
1746 }
1747 }
1748 }
1749
1750 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001751 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001752}
1753
Benjamin Kramer193960c2013-06-11 13:32:25 +00001754static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1755 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001756 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1757 assert(RemapSwizzle.empty());
1758 SDValue NewBldVec[4] = {
1759 VectorEntry.getOperand(0),
1760 VectorEntry.getOperand(1),
1761 VectorEntry.getOperand(2),
1762 VectorEntry.getOperand(3)
1763 };
1764 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001765 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001766 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001767 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1768 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1769 ->getZExtValue();
1770 if (i == Idx)
1771 isUnmovable[Idx] = true;
1772 }
1773 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001774
1775 for (unsigned i = 0; i < 4; i++) {
1776 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1777 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1778 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001779 if (isUnmovable[Idx])
1780 continue;
1781 // Swap i and Idx
1782 std::swap(NewBldVec[Idx], NewBldVec[i]);
1783 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1784 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001785 }
1786 }
1787
1788 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001789 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001790}
1791
1792
1793SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001794 SDValue Swz[4], SelectionDAG &DAG,
1795 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001796 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1797 // Old -> New swizzle values
1798 DenseMap<unsigned, unsigned> SwizzleRemap;
1799
1800 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1801 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001802 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001803 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001804 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001805 }
1806
1807 SwizzleRemap.clear();
1808 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1809 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001810 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001811 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001812 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001813 }
1814
1815 return BuildVector;
1816}
1817
1818
Tom Stellard75aadc22012-12-11 21:25:42 +00001819//===----------------------------------------------------------------------===//
1820// Custom DAG Optimizations
1821//===----------------------------------------------------------------------===//
1822
1823SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1824 DAGCombinerInfo &DCI) const {
1825 SelectionDAG &DAG = DCI.DAG;
1826
1827 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001828 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001829 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1830 case ISD::FP_ROUND: {
1831 SDValue Arg = N->getOperand(0);
1832 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001833 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001834 Arg.getOperand(0));
1835 }
1836 break;
1837 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001838
1839 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1840 // (i32 select_cc f32, f32, -1, 0 cc)
1841 //
1842 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1843 // this to one of the SET*_DX10 instructions.
1844 case ISD::FP_TO_SINT: {
1845 SDValue FNeg = N->getOperand(0);
1846 if (FNeg.getOpcode() != ISD::FNEG) {
1847 return SDValue();
1848 }
1849 SDValue SelectCC = FNeg.getOperand(0);
1850 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1851 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1852 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1853 !isHWTrueValue(SelectCC.getOperand(2)) ||
1854 !isHWFalseValue(SelectCC.getOperand(3))) {
1855 return SDValue();
1856 }
1857
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001858 SDLoc dl(N);
1859 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001860 SelectCC.getOperand(0), // LHS
1861 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001862 DAG.getConstant(-1, dl, MVT::i32), // True
1863 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001864 SelectCC.getOperand(4)); // CC
1865
1866 break;
1867 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001868
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001869 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1870 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001871 case ISD::INSERT_VECTOR_ELT: {
1872 SDValue InVec = N->getOperand(0);
1873 SDValue InVal = N->getOperand(1);
1874 SDValue EltNo = N->getOperand(2);
1875 SDLoc dl(N);
1876
1877 // If the inserted element is an UNDEF, just use the input vector.
1878 if (InVal.getOpcode() == ISD::UNDEF)
1879 return InVec;
1880
1881 EVT VT = InVec.getValueType();
1882
1883 // If we can't generate a legal BUILD_VECTOR, exit
1884 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1885 return SDValue();
1886
1887 // Check that we know which element is being inserted
1888 if (!isa<ConstantSDNode>(EltNo))
1889 return SDValue();
1890 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1891
1892 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1893 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1894 // vector elements.
1895 SmallVector<SDValue, 8> Ops;
1896 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1897 Ops.append(InVec.getNode()->op_begin(),
1898 InVec.getNode()->op_end());
1899 } else if (InVec.getOpcode() == ISD::UNDEF) {
1900 unsigned NElts = VT.getVectorNumElements();
1901 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1902 } else {
1903 return SDValue();
1904 }
1905
1906 // Insert the element
1907 if (Elt < Ops.size()) {
1908 // All the operands of BUILD_VECTOR must have the same type;
1909 // we enforce that here.
1910 EVT OpVT = Ops[0].getValueType();
1911 if (InVal.getValueType() != OpVT)
1912 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1913 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1914 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1915 Ops[Elt] = InVal;
1916 }
1917
1918 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001919 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001920 }
1921
Tom Stellard365366f2013-01-23 02:09:06 +00001922 // Extract_vec (Build_vector) generated by custom lowering
1923 // also needs to be customly combined
1924 case ISD::EXTRACT_VECTOR_ELT: {
1925 SDValue Arg = N->getOperand(0);
1926 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1927 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1928 unsigned Element = Const->getZExtValue();
1929 return Arg->getOperand(Element);
1930 }
1931 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001932 if (Arg.getOpcode() == ISD::BITCAST &&
1933 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1934 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1935 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001936 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001937 Arg->getOperand(0).getOperand(Element));
1938 }
1939 }
Tom Stellard365366f2013-01-23 02:09:06 +00001940 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001941
1942 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001943 // Try common optimizations
1944 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1945 if (Ret.getNode())
1946 return Ret;
1947
Tom Stellarde06163a2013-02-07 14:02:35 +00001948 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1949 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001950 //
1951 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1952 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001953 SDValue LHS = N->getOperand(0);
1954 if (LHS.getOpcode() != ISD::SELECT_CC) {
1955 return SDValue();
1956 }
1957
1958 SDValue RHS = N->getOperand(1);
1959 SDValue True = N->getOperand(2);
1960 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001961 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001962
1963 if (LHS.getOperand(2).getNode() != True.getNode() ||
1964 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001965 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001966 return SDValue();
1967 }
1968
Tom Stellard5e524892013-03-08 15:37:11 +00001969 switch (NCC) {
1970 default: return SDValue();
1971 case ISD::SETNE: return LHS;
1972 case ISD::SETEQ: {
1973 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1974 LHSCC = ISD::getSetCCInverse(LHSCC,
1975 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001976 if (DCI.isBeforeLegalizeOps() ||
1977 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1978 return DAG.getSelectCC(SDLoc(N),
1979 LHS.getOperand(0),
1980 LHS.getOperand(1),
1981 LHS.getOperand(2),
1982 LHS.getOperand(3),
1983 LHSCC);
1984 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001985 }
Tom Stellard5e524892013-03-08 15:37:11 +00001986 }
Tom Stellardcd428182013-09-28 02:50:38 +00001987 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001988 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001989
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001990 case AMDGPUISD::EXPORT: {
1991 SDValue Arg = N->getOperand(1);
1992 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1993 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001994
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001995 SDValue NewArgs[8] = {
1996 N->getOperand(0), // Chain
1997 SDValue(),
1998 N->getOperand(2), // ArrayBase
1999 N->getOperand(3), // Type
2000 N->getOperand(4), // SWZ_X
2001 N->getOperand(5), // SWZ_Y
2002 N->getOperand(6), // SWZ_Z
2003 N->getOperand(7) // SWZ_W
2004 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002005 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002006 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002007 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002008 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002009 case AMDGPUISD::TEXTURE_FETCH: {
2010 SDValue Arg = N->getOperand(1);
2011 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2012 break;
2013
2014 SDValue NewArgs[19] = {
2015 N->getOperand(0),
2016 N->getOperand(1),
2017 N->getOperand(2),
2018 N->getOperand(3),
2019 N->getOperand(4),
2020 N->getOperand(5),
2021 N->getOperand(6),
2022 N->getOperand(7),
2023 N->getOperand(8),
2024 N->getOperand(9),
2025 N->getOperand(10),
2026 N->getOperand(11),
2027 N->getOperand(12),
2028 N->getOperand(13),
2029 N->getOperand(14),
2030 N->getOperand(15),
2031 N->getOperand(16),
2032 N->getOperand(17),
2033 N->getOperand(18),
2034 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002035 SDLoc DL(N);
2036 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2037 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002038 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002039 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002040
2041 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002042}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002043
2044static bool
2045FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002046 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002047 const R600InstrInfo *TII =
2048 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002049 if (!Src.isMachineOpcode())
2050 return false;
2051 switch (Src.getMachineOpcode()) {
2052 case AMDGPU::FNEG_R600:
2053 if (!Neg.getNode())
2054 return false;
2055 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002056 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002057 return true;
2058 case AMDGPU::FABS_R600:
2059 if (!Abs.getNode())
2060 return false;
2061 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002062 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002063 return true;
2064 case AMDGPU::CONST_COPY: {
2065 unsigned Opcode = ParentNode->getMachineOpcode();
2066 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2067
2068 if (!Sel.getNode())
2069 return false;
2070
2071 SDValue CstOffset = Src.getOperand(0);
2072 if (ParentNode->getValueType(0).isVector())
2073 return false;
2074
2075 // Gather constants values
2076 int SrcIndices[] = {
2077 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2078 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2079 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2080 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2081 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2082 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2085 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2086 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2087 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2088 };
2089 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002090 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002091 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2092 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2093 continue;
2094 if (HasDst) {
2095 OtherSrcIdx--;
2096 OtherSelIdx--;
2097 }
2098 if (RegisterSDNode *Reg =
2099 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2100 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002101 ConstantSDNode *Cst
2102 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002103 Consts.push_back(Cst->getZExtValue());
2104 }
2105 }
2106 }
2107
Matt Arsenault37c12d72014-05-12 20:42:57 +00002108 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002109 Consts.push_back(Cst->getZExtValue());
2110 if (!TII->fitsConstReadLimitations(Consts)) {
2111 return false;
2112 }
2113
2114 Sel = CstOffset;
2115 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2116 return true;
2117 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002118 case AMDGPU::MOV_IMM_I32:
2119 case AMDGPU::MOV_IMM_F32: {
2120 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2121 uint64_t ImmValue = 0;
2122
2123
2124 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2125 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2126 float FloatValue = FPC->getValueAPF().convertToFloat();
2127 if (FloatValue == 0.0) {
2128 ImmReg = AMDGPU::ZERO;
2129 } else if (FloatValue == 0.5) {
2130 ImmReg = AMDGPU::HALF;
2131 } else if (FloatValue == 1.0) {
2132 ImmReg = AMDGPU::ONE;
2133 } else {
2134 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2135 }
2136 } else {
2137 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2138 uint64_t Value = C->getZExtValue();
2139 if (Value == 0) {
2140 ImmReg = AMDGPU::ZERO;
2141 } else if (Value == 1) {
2142 ImmReg = AMDGPU::ONE_INT;
2143 } else {
2144 ImmValue = Value;
2145 }
2146 }
2147
2148 // Check that we aren't already using an immediate.
2149 // XXX: It's possible for an instruction to have more than one
2150 // immediate operand, but this is not supported yet.
2151 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2152 if (!Imm.getNode())
2153 return false;
2154 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2155 assert(C);
2156 if (C->getZExtValue())
2157 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002158 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002159 }
2160 Src = DAG.getRegister(ImmReg, MVT::i32);
2161 return true;
2162 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002163 default:
2164 return false;
2165 }
2166}
2167
2168
2169/// \brief Fold the instructions after selecting them
2170SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2171 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002172 const R600InstrInfo *TII =
2173 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002174 if (!Node->isMachineOpcode())
2175 return Node;
2176 unsigned Opcode = Node->getMachineOpcode();
2177 SDValue FakeOp;
2178
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002179 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002180
2181 if (Opcode == AMDGPU::DOT_4) {
2182 int OperandIdx[] = {
2183 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2184 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2185 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2186 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2187 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2188 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2189 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002191 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002192 int NegIdx[] = {
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2201 };
2202 int AbsIdx[] = {
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2211 };
2212 for (unsigned i = 0; i < 8; i++) {
2213 if (OperandIdx[i] < 0)
2214 return Node;
2215 SDValue &Src = Ops[OperandIdx[i] - 1];
2216 SDValue &Neg = Ops[NegIdx[i] - 1];
2217 SDValue &Abs = Ops[AbsIdx[i] - 1];
2218 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2219 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2220 if (HasDst)
2221 SelIdx--;
2222 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002223 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2224 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2225 }
2226 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2227 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2228 SDValue &Src = Ops[i];
2229 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002230 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2231 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002232 } else if (Opcode == AMDGPU::CLAMP_R600) {
2233 SDValue Src = Node->getOperand(0);
2234 if (!Src.isMachineOpcode() ||
2235 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2236 return Node;
2237 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2238 AMDGPU::OpName::clamp);
2239 if (ClampIdx < 0)
2240 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002241 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002242 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002243 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2244 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2245 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002246 } else {
2247 if (!TII->hasInstrModifiers(Opcode))
2248 return Node;
2249 int OperandIdx[] = {
2250 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2251 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2252 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2253 };
2254 int NegIdx[] = {
2255 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2256 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2257 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2258 };
2259 int AbsIdx[] = {
2260 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2261 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2262 -1
2263 };
2264 for (unsigned i = 0; i < 3; i++) {
2265 if (OperandIdx[i] < 0)
2266 return Node;
2267 SDValue &Src = Ops[OperandIdx[i] - 1];
2268 SDValue &Neg = Ops[NegIdx[i] - 1];
2269 SDValue FakeAbs;
2270 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2271 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2272 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002273 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2274 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002275 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002276 ImmIdx--;
2277 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002278 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002279 SDValue &Imm = Ops[ImmIdx];
2280 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002281 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2282 }
2283 }
2284
2285 return Node;
2286}