blob: d6b6e1976570210c504895f539d116648fea94d1 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000289 case AMDGPU::RAT_STORE_TYPED_eg: {
290 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
291 .addOperand(MI->getOperand(0))
292 .addOperand(MI->getOperand(1))
293 .addOperand(MI->getOperand(2))
294 .addImm(isEOP(I)); // Set End of program bit
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Tom Stellard75aadc22012-12-11 21:25:42 +0000298 case AMDGPU::TXD: {
299 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
300 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000301 MachineOperand &RID = MI->getOperand(4);
302 MachineOperand &SID = MI->getOperand(5);
303 unsigned TextureId = MI->getOperand(6).getImm();
304 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
305 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000306
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 switch (TextureId) {
308 case 5: // Rect
309 CTX = CTY = 0;
310 break;
311 case 6: // Shadow1D
312 SrcW = SrcZ;
313 break;
314 case 7: // Shadow2D
315 SrcW = SrcZ;
316 break;
317 case 8: // ShadowRect
318 CTX = CTY = 0;
319 SrcW = SrcZ;
320 break;
321 case 9: // 1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 10: // 2DArray
326 CTZ = 0;
327 break;
328 case 11: // Shadow1DArray
329 SrcZ = SrcY;
330 CTZ = 0;
331 break;
332 case 12: // Shadow2DArray
333 CTZ = 0;
334 break;
335 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
337 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
356 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
375 .addOperand(MI->getOperand(0))
376 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000377 .addImm(SrcX)
378 .addImm(SrcY)
379 .addImm(SrcZ)
380 .addImm(SrcW)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(1)
386 .addImm(2)
387 .addImm(3)
388 .addOperand(RID)
389 .addOperand(SID)
390 .addImm(CTX)
391 .addImm(CTY)
392 .addImm(CTZ)
393 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 .addReg(T0, RegState::Implicit)
395 .addReg(T1, RegState::Implicit);
396 break;
397 }
398
399 case AMDGPU::TXD_SHADOW: {
400 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
401 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000402 MachineOperand &RID = MI->getOperand(4);
403 MachineOperand &SID = MI->getOperand(5);
404 unsigned TextureId = MI->getOperand(6).getImm();
405 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
406 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
407
408 switch (TextureId) {
409 case 5: // Rect
410 CTX = CTY = 0;
411 break;
412 case 6: // Shadow1D
413 SrcW = SrcZ;
414 break;
415 case 7: // Shadow2D
416 SrcW = SrcZ;
417 break;
418 case 8: // ShadowRect
419 CTX = CTY = 0;
420 SrcW = SrcZ;
421 break;
422 case 9: // 1DArray
423 SrcZ = SrcY;
424 CTZ = 0;
425 break;
426 case 10: // 2DArray
427 CTZ = 0;
428 break;
429 case 11: // Shadow1DArray
430 SrcZ = SrcY;
431 CTZ = 0;
432 break;
433 case 12: // Shadow2DArray
434 CTZ = 0;
435 break;
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437
438 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
439 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
458 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
477 .addOperand(MI->getOperand(0))
478 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000479 .addImm(SrcX)
480 .addImm(SrcY)
481 .addImm(SrcZ)
482 .addImm(SrcW)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(0)
487 .addImm(1)
488 .addImm(2)
489 .addImm(3)
490 .addOperand(RID)
491 .addOperand(SID)
492 .addImm(CTX)
493 .addImm(CTY)
494 .addImm(CTZ)
495 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addReg(T0, RegState::Implicit)
497 .addReg(T1, RegState::Implicit);
498 break;
499 }
500
501 case AMDGPU::BRANCH:
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 break;
505
506 case AMDGPU::BRANCH_COND_f32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
520 case AMDGPU::BRANCH_COND_i32: {
521 MachineInstr *NewMI =
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
523 AMDGPU::PREDICATE_BIT)
524 .addOperand(MI->getOperand(1))
525 .addImm(OPCODE_IS_NOT_ZERO_INT)
526 .addImm(0); // Flags
527 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 .addOperand(MI->getOperand(0))
530 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
531 break;
532 }
533
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 case AMDGPU::EG_ExportSwz:
535 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 // Instruction is left unmodified if its not the last one of its type
537 bool isLastInstructionOfItsType = true;
538 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000539 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000540 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
543 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
544 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
545 .getImm();
546 if (CurrentInstExportType == InstExportType) {
547 isLastInstructionOfItsType = false;
548 break;
549 }
550 }
551 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000552 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000553 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 return BB;
555 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
556 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
557 .addOperand(MI->getOperand(0))
558 .addOperand(MI->getOperand(1))
559 .addOperand(MI->getOperand(2))
560 .addOperand(MI->getOperand(3))
561 .addOperand(MI->getOperand(4))
562 .addOperand(MI->getOperand(5))
563 .addOperand(MI->getOperand(6))
564 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 break;
567 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000568 case AMDGPU::RETURN: {
569 // RETURN instructions must have the live-out registers as implicit uses,
570 // otherwise they appear dead.
571 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
572 MachineInstrBuilder MIB(*MF, MI);
573 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
574 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
575 return BB;
576 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000577 }
578
579 MI->eraseFromParent();
580 return BB;
581}
582
583//===----------------------------------------------------------------------===//
584// Custom DAG Lowering Operations
585//===----------------------------------------------------------------------===//
586
Tom Stellard75aadc22012-12-11 21:25:42 +0000587SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000588 MachineFunction &MF = DAG.getMachineFunction();
589 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 switch (Op.getOpcode()) {
591 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000592 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000594 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000595 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000596 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000597 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
598 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000599 case ISD::FCOS:
600 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000603 case ISD::LOAD: {
604 SDValue Result = LowerLOAD(Op, DAG);
605 assert((!Result.getNode() ||
606 Result.getNode()->getNumValues() == 2) &&
607 "Load should return a value and a chain");
608 return Result;
609 }
610
Matt Arsenault1d555c42014-06-23 18:00:55 +0000611 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000612 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 case ISD::INTRINSIC_VOID: {
614 SDValue Chain = Op.getOperand(0);
615 unsigned IntrinsicID =
616 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
617 switch (IntrinsicID) {
618 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000619 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
620 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000621 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000622 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000623 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000624 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000625 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000626 const SDValue Args[8] = {
627 Chain,
628 Op.getOperand(2), // Export Value
629 Op.getOperand(3), // ArrayBase
630 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000631 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
632 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
633 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
634 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000635 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000636 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000637 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000638
Tom Stellard75aadc22012-12-11 21:25:42 +0000639 // default for switch(IntrinsicID)
640 default: break;
641 }
642 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
643 break;
644 }
645 case ISD::INTRINSIC_WO_CHAIN: {
646 unsigned IntrinsicID =
647 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
648 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000649 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000650 switch(IntrinsicID) {
651 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000652 case AMDGPUIntrinsic::R600_interp_xy:
653 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000654 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000655 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000656 SDValue RegisterINode = Op.getOperand(2);
657 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000658
Vincent Lejeunef143af32013-11-11 22:10:24 +0000659 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000660 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000661 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000662 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000663 else
664 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000665 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000666 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000667 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
668 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000670 case AMDGPUIntrinsic::r600_tex:
671 case AMDGPUIntrinsic::r600_texc:
672 case AMDGPUIntrinsic::r600_txl:
673 case AMDGPUIntrinsic::r600_txlc:
674 case AMDGPUIntrinsic::r600_txb:
675 case AMDGPUIntrinsic::r600_txbc:
676 case AMDGPUIntrinsic::r600_txf:
677 case AMDGPUIntrinsic::r600_txq:
678 case AMDGPUIntrinsic::r600_ddx:
679 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000680 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000681 unsigned TextureOp;
682 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000683 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000684 TextureOp = 0;
685 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000686 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000687 TextureOp = 1;
688 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000689 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000690 TextureOp = 2;
691 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000692 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 TextureOp = 3;
694 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000695 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 TextureOp = 4;
697 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000698 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000699 TextureOp = 5;
700 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000701 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 TextureOp = 6;
703 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000704 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000705 TextureOp = 7;
706 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000707 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000708 TextureOp = 8;
709 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000710 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000711 TextureOp = 9;
712 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000713 case AMDGPUIntrinsic::R600_ldptr:
714 TextureOp = 10;
715 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 default:
717 llvm_unreachable("Unknow Texture Operation");
718 }
719
720 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000721 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000722 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000723 DAG.getConstant(0, DL, MVT::i32),
724 DAG.getConstant(1, DL, MVT::i32),
725 DAG.getConstant(2, DL, MVT::i32),
726 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000727 Op.getOperand(2),
728 Op.getOperand(3),
729 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000730 DAG.getConstant(0, DL, MVT::i32),
731 DAG.getConstant(1, DL, MVT::i32),
732 DAG.getConstant(2, DL, MVT::i32),
733 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000734 Op.getOperand(5),
735 Op.getOperand(6),
736 Op.getOperand(7),
737 Op.getOperand(8),
738 Op.getOperand(9),
739 Op.getOperand(10)
740 };
Craig Topper48d114b2014-04-26 18:35:24 +0000741 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000742 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 case AMDGPUIntrinsic::AMDGPU_dp4: {
744 SDValue Args[8] = {
745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000756 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000758 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000759 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000760 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000761 };
Craig Topper48d114b2014-04-26 18:35:24 +0000762 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000763 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000764
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000777 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000778 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000779 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000781 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000782 return LowerImplicitParameter(DAG, VT, DL, 8);
783
Tom Stellarddcb9f092015-07-09 21:20:37 +0000784 case Intrinsic::AMDGPU_read_workdim: {
785 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
786 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
787 }
Jan Veselye5121f32014-10-14 20:05:26 +0000788
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000789 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000790 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
791 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000792 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000793 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
794 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
797 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
800 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
803 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
806 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000807 case Intrinsic::AMDGPU_rsq:
808 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
809 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 }
811 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
812 break;
813 }
814 } // end switch(Op.getOpcode())
815 return SDValue();
816}
817
818void R600TargetLowering::ReplaceNodeResults(SDNode *N,
819 SmallVectorImpl<SDValue> &Results,
820 SelectionDAG &DAG) const {
821 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000822 default:
823 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
824 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000825 case ISD::FP_TO_UINT:
826 if (N->getValueType(0) == MVT::i1) {
827 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
828 return;
829 }
830 // Fall-through. Since we don't care about out of bounds values
831 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
832 // considers some extra cases which are not necessary here.
833 case ISD::FP_TO_SINT: {
834 SDValue Result;
835 if (expandFP_TO_SINT(N, Result, DAG))
836 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000837 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000838 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000839 case ISD::SDIVREM: {
840 SDValue Op = SDValue(N, 1);
841 SDValue RES = LowerSDIVREM(Op, DAG);
842 Results.push_back(RES);
843 Results.push_back(RES.getValue(1));
844 break;
845 }
846 case ISD::UDIVREM: {
847 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000848 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000849 break;
850 }
851 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000852}
853
Tom Stellard880a80a2014-06-17 16:53:14 +0000854SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
855 SDValue Vector) const {
856
857 SDLoc DL(Vector);
858 EVT VecVT = Vector.getValueType();
859 EVT EltVT = VecVT.getVectorElementType();
860 SmallVector<SDValue, 8> Args;
861
862 for (unsigned i = 0, e = VecVT.getVectorNumElements();
863 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000864 Args.push_back(DAG.getNode(
865 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
866 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000867 }
868
869 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
870}
871
872SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
873 SelectionDAG &DAG) const {
874
875 SDLoc DL(Op);
876 SDValue Vector = Op.getOperand(0);
877 SDValue Index = Op.getOperand(1);
878
879 if (isa<ConstantSDNode>(Index) ||
880 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
881 return Op;
882
883 Vector = vectorToVerticalVector(DAG, Vector);
884 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
885 Vector, Index);
886}
887
888SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
889 SelectionDAG &DAG) const {
890 SDLoc DL(Op);
891 SDValue Vector = Op.getOperand(0);
892 SDValue Value = Op.getOperand(1);
893 SDValue Index = Op.getOperand(2);
894
895 if (isa<ConstantSDNode>(Index) ||
896 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
897 return Op;
898
899 Vector = vectorToVerticalVector(DAG, Vector);
900 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
901 Vector, Value, Index);
902 return vectorToVerticalVector(DAG, Insert);
903}
904
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000905SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
906 // On hw >= R700, COS/SIN input must be between -1. and 1.
907 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
908 EVT VT = Op.getValueType();
909 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000910 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000911
912 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000913 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
914 DAG.getNode(ISD::FADD, DL, VT,
915 DAG.getNode(ISD::FMUL, DL, VT, Arg,
916 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
917 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000918 unsigned TrigNode;
919 switch (Op.getOpcode()) {
920 case ISD::FCOS:
921 TrigNode = AMDGPUISD::COS_HW;
922 break;
923 case ISD::FSIN:
924 TrigNode = AMDGPUISD::SIN_HW;
925 break;
926 default:
927 llvm_unreachable("Wrong trig opcode");
928 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000929 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
930 DAG.getNode(ISD::FADD, DL, VT, FractPart,
931 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000932 if (Gen >= AMDGPUSubtarget::R700)
933 return TrigVal;
934 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000935 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
936 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000937}
938
Jan Vesely25f36272014-06-18 12:27:13 +0000939SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
940 SDLoc DL(Op);
941 EVT VT = Op.getValueType();
942
943 SDValue Lo = Op.getOperand(0);
944 SDValue Hi = Op.getOperand(1);
945 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000946 SDValue Zero = DAG.getConstant(0, DL, VT);
947 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000948
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000949 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
950 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000951 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
952 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
953
954 // The dance around Width1 is necessary for 0 special case.
955 // Without it the CompShift might be 32, producing incorrect results in
956 // Overflow. So we do the shift in two steps, the alternative is to
957 // add a conditional to filter the special case.
958
959 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
960 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
961
962 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
963 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
964 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
965
966 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
967 SDValue LoBig = Zero;
968
969 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
970 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
971
972 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
973}
974
Jan Vesely900ff2e2014-06-18 12:27:15 +0000975SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
976 SDLoc DL(Op);
977 EVT VT = Op.getValueType();
978
979 SDValue Lo = Op.getOperand(0);
980 SDValue Hi = Op.getOperand(1);
981 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000982 SDValue Zero = DAG.getConstant(0, DL, VT);
983 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000984
Jan Veselyecf51332014-06-18 12:27:17 +0000985 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
986
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000987 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
988 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000989 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
990 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
991
992 // The dance around Width1 is necessary for 0 special case.
993 // Without it the CompShift might be 32, producing incorrect results in
994 // Overflow. So we do the shift in two steps, the alternative is to
995 // add a conditional to filter the special case.
996
997 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
998 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
999
Jan Veselyecf51332014-06-18 12:27:17 +00001000 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001001 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1002 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1003
Jan Veselyecf51332014-06-18 12:27:17 +00001004 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1005 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001006
1007 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1008 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1009
1010 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1011}
1012
Jan Vesely808fff52015-04-30 17:15:56 +00001013SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1014 unsigned mainop, unsigned ovf) const {
1015 SDLoc DL(Op);
1016 EVT VT = Op.getValueType();
1017
1018 SDValue Lo = Op.getOperand(0);
1019 SDValue Hi = Op.getOperand(1);
1020
1021 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1022 // Extend sign.
1023 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1024 DAG.getValueType(MVT::i1));
1025
1026 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1027
1028 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1029}
1030
Tom Stellard75aadc22012-12-11 21:25:42 +00001031SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001032 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001033 return DAG.getNode(
1034 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001035 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001036 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001037 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001038 DAG.getCondCode(ISD::SETNE)
1039 );
1040}
1041
Tom Stellard75aadc22012-12-11 21:25:42 +00001042SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001043 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001044 unsigned DwordOffset) const {
1045 unsigned ByteOffset = DwordOffset * 4;
1046 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001047 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001048
1049 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1050 assert(isInt<16>(ByteOffset));
1051
1052 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001053 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001054 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1055 false, false, false, 0);
1056}
1057
Tom Stellard75aadc22012-12-11 21:25:42 +00001058bool R600TargetLowering::isZero(SDValue Op) const {
1059 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1060 return Cst->isNullValue();
1061 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1062 return CstFP->isZero();
1063 } else {
1064 return false;
1065 }
1066}
1067
1068SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001069 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001070 EVT VT = Op.getValueType();
1071
1072 SDValue LHS = Op.getOperand(0);
1073 SDValue RHS = Op.getOperand(1);
1074 SDValue True = Op.getOperand(2);
1075 SDValue False = Op.getOperand(3);
1076 SDValue CC = Op.getOperand(4);
1077 SDValue Temp;
1078
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001079 if (VT == MVT::f32) {
1080 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1081 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1082 if (MinMax)
1083 return MinMax;
1084 }
1085
Tom Stellard75aadc22012-12-11 21:25:42 +00001086 // LHS and RHS are guaranteed to be the same value type
1087 EVT CompareVT = LHS.getValueType();
1088
1089 // Check if we can lower this to a native operation.
1090
Tom Stellard2add82d2013-03-08 15:37:09 +00001091 // Try to lower to a SET* instruction:
1092 //
1093 // SET* can match the following patterns:
1094 //
Tom Stellardcd428182013-09-28 02:50:38 +00001095 // select_cc f32, f32, -1, 0, cc_supported
1096 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1097 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001098 //
1099
1100 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001101 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1102 ISD::CondCode InverseCC =
1103 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001104 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1105 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1106 std::swap(False, True);
1107 CC = DAG.getCondCode(InverseCC);
1108 } else {
1109 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1110 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1111 std::swap(False, True);
1112 std::swap(LHS, RHS);
1113 CC = DAG.getCondCode(SwapInvCC);
1114 }
1115 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001116 }
1117
1118 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1119 (CompareVT == VT || VT == MVT::i32)) {
1120 // This can be matched by a SET* instruction.
1121 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1122 }
1123
Tom Stellard75aadc22012-12-11 21:25:42 +00001124 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001125 //
1126 // CND* can match the following patterns:
1127 //
Tom Stellardcd428182013-09-28 02:50:38 +00001128 // select_cc f32, 0.0, f32, f32, cc_supported
1129 // select_cc f32, 0.0, i32, i32, cc_supported
1130 // select_cc i32, 0, f32, f32, cc_supported
1131 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001132 //
Tom Stellardcd428182013-09-28 02:50:38 +00001133
1134 // Try to move the zero value to the RHS
1135 if (isZero(LHS)) {
1136 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1137 // Try swapping the operands
1138 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1139 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1140 std::swap(LHS, RHS);
1141 CC = DAG.getCondCode(CCSwapped);
1142 } else {
1143 // Try inverting the conditon and then swapping the operands
1144 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1145 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1146 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1147 std::swap(True, False);
1148 std::swap(LHS, RHS);
1149 CC = DAG.getCondCode(CCSwapped);
1150 }
1151 }
1152 }
1153 if (isZero(RHS)) {
1154 SDValue Cond = LHS;
1155 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001156 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1157 if (CompareVT != VT) {
1158 // Bitcast True / False to the correct types. This will end up being
1159 // a nop, but it allows us to define only a single pattern in the
1160 // .TD files for each CND* instruction rather than having to have
1161 // one pattern for integer True/False and one for fp True/False
1162 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1163 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1164 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001165
1166 switch (CCOpcode) {
1167 case ISD::SETONE:
1168 case ISD::SETUNE:
1169 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001170 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1171 Temp = True;
1172 True = False;
1173 False = Temp;
1174 break;
1175 default:
1176 break;
1177 }
1178 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1179 Cond, Zero,
1180 True, False,
1181 DAG.getCondCode(CCOpcode));
1182 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1183 }
1184
Tom Stellard75aadc22012-12-11 21:25:42 +00001185 // If we make it this for it means we have no native instructions to handle
1186 // this SELECT_CC, so we must lower it.
1187 SDValue HWTrue, HWFalse;
1188
1189 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001190 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1191 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001192 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001193 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1194 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001195 }
1196 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001197 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001198 }
1199
1200 // Lower this unsupported SELECT_CC into a combination of two supported
1201 // SELECT_CC operations.
1202 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1203
1204 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1205 Cond, HWFalse,
1206 True, False,
1207 DAG.getCondCode(ISD::SETNE));
1208}
1209
Alp Tokercb402912014-01-24 17:20:08 +00001210/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001211/// convert these pointers to a register index. Each register holds
1212/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1213/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1214/// for indirect addressing.
1215SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1216 unsigned StackWidth,
1217 SelectionDAG &DAG) const {
1218 unsigned SRLPad;
1219 switch(StackWidth) {
1220 case 1:
1221 SRLPad = 2;
1222 break;
1223 case 2:
1224 SRLPad = 3;
1225 break;
1226 case 4:
1227 SRLPad = 4;
1228 break;
1229 default: llvm_unreachable("Invalid stack width");
1230 }
1231
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001232 SDLoc DL(Ptr);
1233 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1234 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001235}
1236
1237void R600TargetLowering::getStackAddress(unsigned StackWidth,
1238 unsigned ElemIdx,
1239 unsigned &Channel,
1240 unsigned &PtrIncr) const {
1241 switch (StackWidth) {
1242 default:
1243 case 1:
1244 Channel = 0;
1245 if (ElemIdx > 0) {
1246 PtrIncr = 1;
1247 } else {
1248 PtrIncr = 0;
1249 }
1250 break;
1251 case 2:
1252 Channel = ElemIdx % 2;
1253 if (ElemIdx == 2) {
1254 PtrIncr = 1;
1255 } else {
1256 PtrIncr = 0;
1257 }
1258 break;
1259 case 4:
1260 Channel = ElemIdx;
1261 PtrIncr = 0;
1262 break;
1263 }
1264}
1265
Tom Stellard75aadc22012-12-11 21:25:42 +00001266SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001267 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001268 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1269 SDValue Chain = Op.getOperand(0);
1270 SDValue Value = Op.getOperand(1);
1271 SDValue Ptr = Op.getOperand(2);
1272
Tom Stellard2ffc3302013-08-26 15:05:44 +00001273 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001274 if (Result.getNode()) {
1275 return Result;
1276 }
1277
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001278 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1279 if (StoreNode->isTruncatingStore()) {
1280 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001281 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001282 EVT MemVT = StoreNode->getMemoryVT();
1283 SDValue MaskConstant;
1284 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001285 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001286 } else {
1287 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001288 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001289 }
1290 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001291 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001292 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001293 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001294 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1295 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001296 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001297 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1298 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1299 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1300 // vector instead.
1301 SDValue Src[4] = {
1302 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001303 DAG.getConstant(0, DL, MVT::i32),
1304 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001305 Mask
1306 };
Craig Topper48d114b2014-04-26 18:35:24 +00001307 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001308 SDValue Args[3] = { Chain, Input, DWordAddr };
1309 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001310 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001311 StoreNode->getMemOperand());
1312 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1313 Value.getValueType().bitsGE(MVT::i32)) {
1314 // Convert pointer from byte address to dword address.
1315 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1316 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001317 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001318
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001319 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001320 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001321 } else {
1322 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1323 }
1324 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001325 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001326 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001327
1328 EVT ValueVT = Value.getValueType();
1329
1330 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1331 return SDValue();
1332 }
1333
Tom Stellarde9373602014-01-22 19:24:14 +00001334 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1335 if (Ret.getNode()) {
1336 return Ret;
1337 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001338 // Lowering for indirect addressing
1339
1340 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001341 const AMDGPUFrameLowering *TFL =
1342 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001343 unsigned StackWidth = TFL->getStackWidth(MF);
1344
1345 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1346
1347 if (ValueVT.isVector()) {
1348 unsigned NumElemVT = ValueVT.getVectorNumElements();
1349 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001350 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001351
1352 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1353 "vector width in load");
1354
1355 for (unsigned i = 0; i < NumElemVT; ++i) {
1356 unsigned Channel, PtrIncr;
1357 getStackAddress(StackWidth, i, Channel, PtrIncr);
1358 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001359 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001360 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001361 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001362
1363 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1364 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001365 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001366 }
Craig Topper48d114b2014-04-26 18:35:24 +00001367 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001368 } else {
1369 if (ValueVT == MVT::i8) {
1370 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1371 }
1372 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001373 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001374 }
1375
1376 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001377}
1378
Tom Stellard365366f2013-01-23 02:09:06 +00001379// return (512 + (kc_bank << 12)
1380static int
1381ConstantAddressBlock(unsigned AddressSpace) {
1382 switch (AddressSpace) {
1383 case AMDGPUAS::CONSTANT_BUFFER_0:
1384 return 512;
1385 case AMDGPUAS::CONSTANT_BUFFER_1:
1386 return 512 + 4096;
1387 case AMDGPUAS::CONSTANT_BUFFER_2:
1388 return 512 + 4096 * 2;
1389 case AMDGPUAS::CONSTANT_BUFFER_3:
1390 return 512 + 4096 * 3;
1391 case AMDGPUAS::CONSTANT_BUFFER_4:
1392 return 512 + 4096 * 4;
1393 case AMDGPUAS::CONSTANT_BUFFER_5:
1394 return 512 + 4096 * 5;
1395 case AMDGPUAS::CONSTANT_BUFFER_6:
1396 return 512 + 4096 * 6;
1397 case AMDGPUAS::CONSTANT_BUFFER_7:
1398 return 512 + 4096 * 7;
1399 case AMDGPUAS::CONSTANT_BUFFER_8:
1400 return 512 + 4096 * 8;
1401 case AMDGPUAS::CONSTANT_BUFFER_9:
1402 return 512 + 4096 * 9;
1403 case AMDGPUAS::CONSTANT_BUFFER_10:
1404 return 512 + 4096 * 10;
1405 case AMDGPUAS::CONSTANT_BUFFER_11:
1406 return 512 + 4096 * 11;
1407 case AMDGPUAS::CONSTANT_BUFFER_12:
1408 return 512 + 4096 * 12;
1409 case AMDGPUAS::CONSTANT_BUFFER_13:
1410 return 512 + 4096 * 13;
1411 case AMDGPUAS::CONSTANT_BUFFER_14:
1412 return 512 + 4096 * 14;
1413 case AMDGPUAS::CONSTANT_BUFFER_15:
1414 return 512 + 4096 * 15;
1415 default:
1416 return -1;
1417 }
1418}
1419
1420SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1421{
1422 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001423 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001424 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1425 SDValue Chain = Op.getOperand(0);
1426 SDValue Ptr = Op.getOperand(1);
1427 SDValue LoweredLoad;
1428
Matt Arsenault8b03e6c2015-07-09 18:47:03 +00001429 if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1430 return Ret;
Tom Stellarde9373602014-01-22 19:24:14 +00001431
Tom Stellard067c8152014-07-21 14:01:14 +00001432 // Lower loads constant address space global variable loads
1433 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001434 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001435 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001436
Mehdi Amini44ede332015-07-09 02:09:04 +00001437 SDValue Ptr = DAG.getZExtOrTrunc(
1438 LoadNode->getBasePtr(), DL,
1439 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001440 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001441 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001442 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1443 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001444 DAG.getTargetConstant(0, DL, MVT::i32),
1445 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001446 }
Tom Stellarde9373602014-01-22 19:24:14 +00001447
Tom Stellard35bb18c2013-08-26 15:06:04 +00001448 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1449 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001450 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001451 Chain
1452 };
Craig Topper64941d92014-04-27 19:20:57 +00001453 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001454 }
1455
Tom Stellard365366f2013-01-23 02:09:06 +00001456 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001457 if (ConstantBlock > -1 &&
1458 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1459 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001460 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001461 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1462 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001463 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001464 SDValue Slots[4];
1465 for (unsigned i = 0; i < 4; i++) {
1466 // We want Const position encoded with the following formula :
1467 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1468 // const_index is Ptr computed by llvm using an alignment of 16.
1469 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1470 // then div by 4 at the ISel step
1471 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001472 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001473 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1474 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001475 EVT NewVT = MVT::v4i32;
1476 unsigned NumElements = 4;
1477 if (VT.isVector()) {
1478 NewVT = VT;
1479 NumElements = VT.getVectorNumElements();
1480 }
Craig Topper48d114b2014-04-26 18:35:24 +00001481 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001482 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001483 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001484 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001485 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001486 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1487 DAG.getConstant(4, DL, MVT::i32)),
1488 DAG.getConstant(LoadNode->getAddressSpace() -
1489 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001490 );
1491 }
1492
1493 if (!VT.isVector()) {
1494 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001495 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001496 }
1497
1498 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001499 Result,
1500 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001501 };
Craig Topper64941d92014-04-27 19:20:57 +00001502 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001503 }
1504
Matt Arsenault909d0c02013-10-30 23:43:29 +00001505 // For most operations returning SDValue() will result in the node being
1506 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1507 // need to manually expand loads that may be legal in some address spaces and
1508 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1509 // compute shaders, since the data is sign extended when it is uploaded to the
1510 // buffer. However SEXT loads from other address spaces are not supported, so
1511 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001512 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1513 EVT MemVT = LoadNode->getMemoryVT();
1514 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001515 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1516 LoadNode->getPointerInfo(), MemVT,
1517 LoadNode->isVolatile(),
1518 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001519 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001520 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001521 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1522 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001523
Jan Veselyb670d372015-05-26 18:07:22 +00001524 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001525 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001526 }
1527
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001528 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1529 return SDValue();
1530 }
1531
1532 // Lowering for indirect addressing
1533 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001534 const AMDGPUFrameLowering *TFL =
1535 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001536 unsigned StackWidth = TFL->getStackWidth(MF);
1537
1538 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1539
1540 if (VT.isVector()) {
1541 unsigned NumElemVT = VT.getVectorNumElements();
1542 EVT ElemVT = VT.getVectorElementType();
1543 SDValue Loads[4];
1544
1545 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1546 "vector width in load");
1547
1548 for (unsigned i = 0; i < NumElemVT; ++i) {
1549 unsigned Channel, PtrIncr;
1550 getStackAddress(StackWidth, i, Channel, PtrIncr);
1551 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001552 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001553 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1554 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001555 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001556 Op.getOperand(2));
1557 }
1558 for (unsigned i = NumElemVT; i < 4; ++i) {
1559 Loads[i] = DAG.getUNDEF(ElemVT);
1560 }
1561 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001562 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001563 } else {
1564 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1565 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001566 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001567 Op.getOperand(2));
1568 }
1569
Matt Arsenault7939acd2014-04-07 16:44:24 +00001570 SDValue Ops[2] = {
1571 LoweredLoad,
1572 Chain
1573 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001574
Craig Topper64941d92014-04-27 19:20:57 +00001575 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001576}
Tom Stellard75aadc22012-12-11 21:25:42 +00001577
Matt Arsenault1d555c42014-06-23 18:00:55 +00001578SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1579 SDValue Chain = Op.getOperand(0);
1580 SDValue Cond = Op.getOperand(1);
1581 SDValue Jump = Op.getOperand(2);
1582
1583 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1584 Chain, Jump, Cond);
1585}
1586
Tom Stellard75aadc22012-12-11 21:25:42 +00001587/// XXX Only kernel functions are supported, so we can assume for now that
1588/// every function is a kernel function, but in the future we should use
1589/// separate calling conventions for kernel and non-kernel functions.
1590SDValue R600TargetLowering::LowerFormalArguments(
1591 SDValue Chain,
1592 CallingConv::ID CallConv,
1593 bool isVarArg,
1594 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001595 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001596 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001597 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001598 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1599 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001600 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001601 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001602
Tom Stellardaf775432013-10-23 00:44:32 +00001603 SmallVector<ISD::InputArg, 8> LocalIns;
1604
Matt Arsenault209a7b92014-04-18 07:40:20 +00001605 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001606
1607 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001608
Tom Stellard1e803092013-07-23 01:48:18 +00001609 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001610 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001611 const ISD::InputArg &In = Ins[i];
1612 EVT VT = In.VT;
1613 EVT MemVT = VA.getLocVT();
1614 if (!VT.isVector() && MemVT.isVector()) {
1615 // Get load source type if scalarized.
1616 MemVT = MemVT.getVectorElementType();
1617 }
Tom Stellard78e01292013-07-23 01:47:58 +00001618
Jan Veselye5121f32014-10-14 20:05:26 +00001619 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001620 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1621 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1622 InVals.push_back(Register);
1623 continue;
1624 }
1625
Tom Stellard75aadc22012-12-11 21:25:42 +00001626 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001627 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001628
Matt Arsenaultfae02982014-03-17 18:58:11 +00001629 // i64 isn't a legal type, so the register type used ends up as i32, which
1630 // isn't expected here. It attempts to create this sextload, but it ends up
1631 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1632 // for <1 x i64>.
1633
Tom Stellardacfeebf2013-07-23 01:48:05 +00001634 // The first 36 bytes of the input buffer contains information about
1635 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001636 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1637 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1638 // FIXME: This should really check the extload type, but the handling of
1639 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001640
Matt Arsenault74ef2772014-08-13 18:14:11 +00001641 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1642 Ext = ISD::SEXTLOAD;
1643 }
1644
1645 // Compute the offset from the value.
1646 // XXX - I think PartOffset should give you this, but it seems to give the
1647 // size of the register which isn't useful.
1648
Andrew Trick05938a52015-02-16 18:10:47 +00001649 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001650 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001651 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001652
1653 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1654 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001655 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001656 DAG.getUNDEF(MVT::i32),
1657 PtrInfo,
1658 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001659
1660 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001661 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001662 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001663 }
1664 return Chain;
1665}
1666
Mehdi Amini44ede332015-07-09 02:09:04 +00001667EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1668 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001669 if (!VT.isVector())
1670 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001671 return VT.changeVectorElementTypeToInteger();
1672}
1673
Matt Arsenault209a7b92014-04-18 07:40:20 +00001674static SDValue CompactSwizzlableVector(
1675 SelectionDAG &DAG, SDValue VectorEntry,
1676 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001677 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1678 assert(RemapSwizzle.empty());
1679 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001680 VectorEntry.getOperand(0),
1681 VectorEntry.getOperand(1),
1682 VectorEntry.getOperand(2),
1683 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001684 };
1685
1686 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001687 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1688 // We mask write here to teach later passes that the ith element of this
1689 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1690 // break false dependencies and additionnaly make assembly easier to read.
1691 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001692 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1693 if (C->isZero()) {
1694 RemapSwizzle[i] = 4; // SEL_0
1695 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1696 } else if (C->isExactlyValue(1.0)) {
1697 RemapSwizzle[i] = 5; // SEL_1
1698 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1699 }
1700 }
1701
1702 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1703 continue;
1704 for (unsigned j = 0; j < i; j++) {
1705 if (NewBldVec[i] == NewBldVec[j]) {
1706 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1707 RemapSwizzle[i] = j;
1708 break;
1709 }
1710 }
1711 }
1712
1713 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001714 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001715}
1716
Benjamin Kramer193960c2013-06-11 13:32:25 +00001717static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1718 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001719 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1720 assert(RemapSwizzle.empty());
1721 SDValue NewBldVec[4] = {
1722 VectorEntry.getOperand(0),
1723 VectorEntry.getOperand(1),
1724 VectorEntry.getOperand(2),
1725 VectorEntry.getOperand(3)
1726 };
1727 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001728 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001729 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001730 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1731 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1732 ->getZExtValue();
1733 if (i == Idx)
1734 isUnmovable[Idx] = true;
1735 }
1736 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001737
1738 for (unsigned i = 0; i < 4; i++) {
1739 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1740 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1741 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001742 if (isUnmovable[Idx])
1743 continue;
1744 // Swap i and Idx
1745 std::swap(NewBldVec[Idx], NewBldVec[i]);
1746 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1747 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001748 }
1749 }
1750
1751 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001752 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001753}
1754
1755
1756SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001757 SDValue Swz[4], SelectionDAG &DAG,
1758 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001759 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1760 // Old -> New swizzle values
1761 DenseMap<unsigned, unsigned> SwizzleRemap;
1762
1763 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1764 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001765 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001766 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001767 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001768 }
1769
1770 SwizzleRemap.clear();
1771 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1772 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001773 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001774 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001775 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001776 }
1777
1778 return BuildVector;
1779}
1780
1781
Tom Stellard75aadc22012-12-11 21:25:42 +00001782//===----------------------------------------------------------------------===//
1783// Custom DAG Optimizations
1784//===----------------------------------------------------------------------===//
1785
1786SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1787 DAGCombinerInfo &DCI) const {
1788 SelectionDAG &DAG = DCI.DAG;
1789
1790 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001791 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001792 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1793 case ISD::FP_ROUND: {
1794 SDValue Arg = N->getOperand(0);
1795 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001796 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001797 Arg.getOperand(0));
1798 }
1799 break;
1800 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001801
1802 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1803 // (i32 select_cc f32, f32, -1, 0 cc)
1804 //
1805 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1806 // this to one of the SET*_DX10 instructions.
1807 case ISD::FP_TO_SINT: {
1808 SDValue FNeg = N->getOperand(0);
1809 if (FNeg.getOpcode() != ISD::FNEG) {
1810 return SDValue();
1811 }
1812 SDValue SelectCC = FNeg.getOperand(0);
1813 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1814 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1815 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1816 !isHWTrueValue(SelectCC.getOperand(2)) ||
1817 !isHWFalseValue(SelectCC.getOperand(3))) {
1818 return SDValue();
1819 }
1820
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001821 SDLoc dl(N);
1822 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001823 SelectCC.getOperand(0), // LHS
1824 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001825 DAG.getConstant(-1, dl, MVT::i32), // True
1826 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001827 SelectCC.getOperand(4)); // CC
1828
1829 break;
1830 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001831
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001832 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1833 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001834 case ISD::INSERT_VECTOR_ELT: {
1835 SDValue InVec = N->getOperand(0);
1836 SDValue InVal = N->getOperand(1);
1837 SDValue EltNo = N->getOperand(2);
1838 SDLoc dl(N);
1839
1840 // If the inserted element is an UNDEF, just use the input vector.
1841 if (InVal.getOpcode() == ISD::UNDEF)
1842 return InVec;
1843
1844 EVT VT = InVec.getValueType();
1845
1846 // If we can't generate a legal BUILD_VECTOR, exit
1847 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1848 return SDValue();
1849
1850 // Check that we know which element is being inserted
1851 if (!isa<ConstantSDNode>(EltNo))
1852 return SDValue();
1853 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1854
1855 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1856 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1857 // vector elements.
1858 SmallVector<SDValue, 8> Ops;
1859 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1860 Ops.append(InVec.getNode()->op_begin(),
1861 InVec.getNode()->op_end());
1862 } else if (InVec.getOpcode() == ISD::UNDEF) {
1863 unsigned NElts = VT.getVectorNumElements();
1864 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1865 } else {
1866 return SDValue();
1867 }
1868
1869 // Insert the element
1870 if (Elt < Ops.size()) {
1871 // All the operands of BUILD_VECTOR must have the same type;
1872 // we enforce that here.
1873 EVT OpVT = Ops[0].getValueType();
1874 if (InVal.getValueType() != OpVT)
1875 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1876 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1877 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1878 Ops[Elt] = InVal;
1879 }
1880
1881 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001882 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001883 }
1884
Tom Stellard365366f2013-01-23 02:09:06 +00001885 // Extract_vec (Build_vector) generated by custom lowering
1886 // also needs to be customly combined
1887 case ISD::EXTRACT_VECTOR_ELT: {
1888 SDValue Arg = N->getOperand(0);
1889 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1890 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1891 unsigned Element = Const->getZExtValue();
1892 return Arg->getOperand(Element);
1893 }
1894 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001895 if (Arg.getOpcode() == ISD::BITCAST &&
1896 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1897 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1898 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001899 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001900 Arg->getOperand(0).getOperand(Element));
1901 }
1902 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00001903 break;
Tom Stellard365366f2013-01-23 02:09:06 +00001904 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001905
1906 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001907 // Try common optimizations
1908 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1909 if (Ret.getNode())
1910 return Ret;
1911
Tom Stellarde06163a2013-02-07 14:02:35 +00001912 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1913 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001914 //
1915 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1916 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001917 SDValue LHS = N->getOperand(0);
1918 if (LHS.getOpcode() != ISD::SELECT_CC) {
1919 return SDValue();
1920 }
1921
1922 SDValue RHS = N->getOperand(1);
1923 SDValue True = N->getOperand(2);
1924 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001925 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001926
1927 if (LHS.getOperand(2).getNode() != True.getNode() ||
1928 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001929 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001930 return SDValue();
1931 }
1932
Tom Stellard5e524892013-03-08 15:37:11 +00001933 switch (NCC) {
1934 default: return SDValue();
1935 case ISD::SETNE: return LHS;
1936 case ISD::SETEQ: {
1937 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1938 LHSCC = ISD::getSetCCInverse(LHSCC,
1939 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001940 if (DCI.isBeforeLegalizeOps() ||
1941 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1942 return DAG.getSelectCC(SDLoc(N),
1943 LHS.getOperand(0),
1944 LHS.getOperand(1),
1945 LHS.getOperand(2),
1946 LHS.getOperand(3),
1947 LHSCC);
1948 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001949 }
Tom Stellard5e524892013-03-08 15:37:11 +00001950 }
Tom Stellardcd428182013-09-28 02:50:38 +00001951 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001952 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001953
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001954 case AMDGPUISD::EXPORT: {
1955 SDValue Arg = N->getOperand(1);
1956 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1957 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001958
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001959 SDValue NewArgs[8] = {
1960 N->getOperand(0), // Chain
1961 SDValue(),
1962 N->getOperand(2), // ArrayBase
1963 N->getOperand(3), // Type
1964 N->getOperand(4), // SWZ_X
1965 N->getOperand(5), // SWZ_Y
1966 N->getOperand(6), // SWZ_Z
1967 N->getOperand(7) // SWZ_W
1968 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001969 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001970 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00001971 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001972 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001973 case AMDGPUISD::TEXTURE_FETCH: {
1974 SDValue Arg = N->getOperand(1);
1975 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1976 break;
1977
1978 SDValue NewArgs[19] = {
1979 N->getOperand(0),
1980 N->getOperand(1),
1981 N->getOperand(2),
1982 N->getOperand(3),
1983 N->getOperand(4),
1984 N->getOperand(5),
1985 N->getOperand(6),
1986 N->getOperand(7),
1987 N->getOperand(8),
1988 N->getOperand(9),
1989 N->getOperand(10),
1990 N->getOperand(11),
1991 N->getOperand(12),
1992 N->getOperand(13),
1993 N->getOperand(14),
1994 N->getOperand(15),
1995 N->getOperand(16),
1996 N->getOperand(17),
1997 N->getOperand(18),
1998 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001999 SDLoc DL(N);
2000 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2001 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002002 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002003 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002004
2005 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002006}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002007
2008static bool
2009FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002010 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002011 const R600InstrInfo *TII =
2012 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002013 if (!Src.isMachineOpcode())
2014 return false;
2015 switch (Src.getMachineOpcode()) {
2016 case AMDGPU::FNEG_R600:
2017 if (!Neg.getNode())
2018 return false;
2019 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002020 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002021 return true;
2022 case AMDGPU::FABS_R600:
2023 if (!Abs.getNode())
2024 return false;
2025 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002026 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002027 return true;
2028 case AMDGPU::CONST_COPY: {
2029 unsigned Opcode = ParentNode->getMachineOpcode();
2030 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2031
2032 if (!Sel.getNode())
2033 return false;
2034
2035 SDValue CstOffset = Src.getOperand(0);
2036 if (ParentNode->getValueType(0).isVector())
2037 return false;
2038
2039 // Gather constants values
2040 int SrcIndices[] = {
2041 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2042 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2043 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2044 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2045 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2046 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2047 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2048 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2049 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2050 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2051 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2052 };
2053 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002054 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002055 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2056 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2057 continue;
2058 if (HasDst) {
2059 OtherSrcIdx--;
2060 OtherSelIdx--;
2061 }
2062 if (RegisterSDNode *Reg =
2063 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2064 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002065 ConstantSDNode *Cst
2066 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002067 Consts.push_back(Cst->getZExtValue());
2068 }
2069 }
2070 }
2071
Matt Arsenault37c12d72014-05-12 20:42:57 +00002072 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002073 Consts.push_back(Cst->getZExtValue());
2074 if (!TII->fitsConstReadLimitations(Consts)) {
2075 return false;
2076 }
2077
2078 Sel = CstOffset;
2079 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2080 return true;
2081 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002082 case AMDGPU::MOV_IMM_I32:
2083 case AMDGPU::MOV_IMM_F32: {
2084 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2085 uint64_t ImmValue = 0;
2086
2087
2088 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2089 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2090 float FloatValue = FPC->getValueAPF().convertToFloat();
2091 if (FloatValue == 0.0) {
2092 ImmReg = AMDGPU::ZERO;
2093 } else if (FloatValue == 0.5) {
2094 ImmReg = AMDGPU::HALF;
2095 } else if (FloatValue == 1.0) {
2096 ImmReg = AMDGPU::ONE;
2097 } else {
2098 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2099 }
2100 } else {
2101 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2102 uint64_t Value = C->getZExtValue();
2103 if (Value == 0) {
2104 ImmReg = AMDGPU::ZERO;
2105 } else if (Value == 1) {
2106 ImmReg = AMDGPU::ONE_INT;
2107 } else {
2108 ImmValue = Value;
2109 }
2110 }
2111
2112 // Check that we aren't already using an immediate.
2113 // XXX: It's possible for an instruction to have more than one
2114 // immediate operand, but this is not supported yet.
2115 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2116 if (!Imm.getNode())
2117 return false;
2118 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2119 assert(C);
2120 if (C->getZExtValue())
2121 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002122 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002123 }
2124 Src = DAG.getRegister(ImmReg, MVT::i32);
2125 return true;
2126 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002127 default:
2128 return false;
2129 }
2130}
2131
2132
2133/// \brief Fold the instructions after selecting them
2134SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2135 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002136 const R600InstrInfo *TII =
2137 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002138 if (!Node->isMachineOpcode())
2139 return Node;
2140 unsigned Opcode = Node->getMachineOpcode();
2141 SDValue FakeOp;
2142
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002143 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002144
2145 if (Opcode == AMDGPU::DOT_4) {
2146 int OperandIdx[] = {
2147 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2148 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2149 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2150 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2151 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2152 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2153 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2154 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002155 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002156 int NegIdx[] = {
2157 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2158 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2159 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2160 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2161 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2162 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2163 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2164 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2165 };
2166 int AbsIdx[] = {
2167 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2168 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2169 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2170 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2171 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2172 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2173 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2174 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2175 };
2176 for (unsigned i = 0; i < 8; i++) {
2177 if (OperandIdx[i] < 0)
2178 return Node;
2179 SDValue &Src = Ops[OperandIdx[i] - 1];
2180 SDValue &Neg = Ops[NegIdx[i] - 1];
2181 SDValue &Abs = Ops[AbsIdx[i] - 1];
2182 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2183 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2184 if (HasDst)
2185 SelIdx--;
2186 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002187 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2188 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2189 }
2190 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2191 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2192 SDValue &Src = Ops[i];
2193 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002194 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2195 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002196 } else if (Opcode == AMDGPU::CLAMP_R600) {
2197 SDValue Src = Node->getOperand(0);
2198 if (!Src.isMachineOpcode() ||
2199 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2200 return Node;
2201 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2202 AMDGPU::OpName::clamp);
2203 if (ClampIdx < 0)
2204 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002205 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002206 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002207 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2208 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2209 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002210 } else {
2211 if (!TII->hasInstrModifiers(Opcode))
2212 return Node;
2213 int OperandIdx[] = {
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2217 };
2218 int NegIdx[] = {
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2222 };
2223 int AbsIdx[] = {
2224 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2226 -1
2227 };
2228 for (unsigned i = 0; i < 3; i++) {
2229 if (OperandIdx[i] < 0)
2230 return Node;
2231 SDValue &Src = Ops[OperandIdx[i] - 1];
2232 SDValue &Neg = Ops[NegIdx[i] - 1];
2233 SDValue FakeAbs;
2234 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2235 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2236 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002237 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2238 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002239 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002240 ImmIdx--;
2241 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002242 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002243 SDValue &Imm = Ops[ImmIdx];
2244 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002245 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2246 }
2247 }
2248
2249 return Node;
2250}