blob: f50f6f87c8c59f59f694256c25959963f8c2a7b8 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000289 case AMDGPU::RAT_STORE_TYPED_eg: {
290 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
291 .addOperand(MI->getOperand(0))
292 .addOperand(MI->getOperand(1))
293 .addOperand(MI->getOperand(2))
294 .addImm(isEOP(I)); // Set End of program bit
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Tom Stellard75aadc22012-12-11 21:25:42 +0000298 case AMDGPU::TXD: {
299 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
300 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000301 MachineOperand &RID = MI->getOperand(4);
302 MachineOperand &SID = MI->getOperand(5);
303 unsigned TextureId = MI->getOperand(6).getImm();
304 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
305 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000306
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 switch (TextureId) {
308 case 5: // Rect
309 CTX = CTY = 0;
310 break;
311 case 6: // Shadow1D
312 SrcW = SrcZ;
313 break;
314 case 7: // Shadow2D
315 SrcW = SrcZ;
316 break;
317 case 8: // ShadowRect
318 CTX = CTY = 0;
319 SrcW = SrcZ;
320 break;
321 case 9: // 1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 10: // 2DArray
326 CTZ = 0;
327 break;
328 case 11: // Shadow1DArray
329 SrcZ = SrcY;
330 CTZ = 0;
331 break;
332 case 12: // Shadow2DArray
333 CTZ = 0;
334 break;
335 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
337 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
356 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
375 .addOperand(MI->getOperand(0))
376 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000377 .addImm(SrcX)
378 .addImm(SrcY)
379 .addImm(SrcZ)
380 .addImm(SrcW)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(1)
386 .addImm(2)
387 .addImm(3)
388 .addOperand(RID)
389 .addOperand(SID)
390 .addImm(CTX)
391 .addImm(CTY)
392 .addImm(CTZ)
393 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 .addReg(T0, RegState::Implicit)
395 .addReg(T1, RegState::Implicit);
396 break;
397 }
398
399 case AMDGPU::TXD_SHADOW: {
400 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
401 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000402 MachineOperand &RID = MI->getOperand(4);
403 MachineOperand &SID = MI->getOperand(5);
404 unsigned TextureId = MI->getOperand(6).getImm();
405 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
406 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
407
408 switch (TextureId) {
409 case 5: // Rect
410 CTX = CTY = 0;
411 break;
412 case 6: // Shadow1D
413 SrcW = SrcZ;
414 break;
415 case 7: // Shadow2D
416 SrcW = SrcZ;
417 break;
418 case 8: // ShadowRect
419 CTX = CTY = 0;
420 SrcW = SrcZ;
421 break;
422 case 9: // 1DArray
423 SrcZ = SrcY;
424 CTZ = 0;
425 break;
426 case 10: // 2DArray
427 CTZ = 0;
428 break;
429 case 11: // Shadow1DArray
430 SrcZ = SrcY;
431 CTZ = 0;
432 break;
433 case 12: // Shadow2DArray
434 CTZ = 0;
435 break;
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437
438 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
439 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
458 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
477 .addOperand(MI->getOperand(0))
478 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000479 .addImm(SrcX)
480 .addImm(SrcY)
481 .addImm(SrcZ)
482 .addImm(SrcW)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(0)
487 .addImm(1)
488 .addImm(2)
489 .addImm(3)
490 .addOperand(RID)
491 .addOperand(SID)
492 .addImm(CTX)
493 .addImm(CTY)
494 .addImm(CTZ)
495 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addReg(T0, RegState::Implicit)
497 .addReg(T1, RegState::Implicit);
498 break;
499 }
500
501 case AMDGPU::BRANCH:
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 break;
505
506 case AMDGPU::BRANCH_COND_f32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
520 case AMDGPU::BRANCH_COND_i32: {
521 MachineInstr *NewMI =
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
523 AMDGPU::PREDICATE_BIT)
524 .addOperand(MI->getOperand(1))
525 .addImm(OPCODE_IS_NOT_ZERO_INT)
526 .addImm(0); // Flags
527 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 .addOperand(MI->getOperand(0))
530 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
531 break;
532 }
533
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 case AMDGPU::EG_ExportSwz:
535 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 // Instruction is left unmodified if its not the last one of its type
537 bool isLastInstructionOfItsType = true;
538 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000539 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000540 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
543 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
544 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
545 .getImm();
546 if (CurrentInstExportType == InstExportType) {
547 isLastInstructionOfItsType = false;
548 break;
549 }
550 }
551 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000552 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000553 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 return BB;
555 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
556 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
557 .addOperand(MI->getOperand(0))
558 .addOperand(MI->getOperand(1))
559 .addOperand(MI->getOperand(2))
560 .addOperand(MI->getOperand(3))
561 .addOperand(MI->getOperand(4))
562 .addOperand(MI->getOperand(5))
563 .addOperand(MI->getOperand(6))
564 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 break;
567 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000568 case AMDGPU::RETURN: {
569 // RETURN instructions must have the live-out registers as implicit uses,
570 // otherwise they appear dead.
571 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
572 MachineInstrBuilder MIB(*MF, MI);
573 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
574 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
575 return BB;
576 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000577 }
578
579 MI->eraseFromParent();
580 return BB;
581}
582
583//===----------------------------------------------------------------------===//
584// Custom DAG Lowering Operations
585//===----------------------------------------------------------------------===//
586
Tom Stellard75aadc22012-12-11 21:25:42 +0000587SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000588 MachineFunction &MF = DAG.getMachineFunction();
589 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 switch (Op.getOpcode()) {
591 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000592 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000594 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000595 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000596 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000597 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
598 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000599 case ISD::FCOS:
600 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000603 case ISD::LOAD: {
604 SDValue Result = LowerLOAD(Op, DAG);
605 assert((!Result.getNode() ||
606 Result.getNode()->getNumValues() == 2) &&
607 "Load should return a value and a chain");
608 return Result;
609 }
610
Matt Arsenault1d555c42014-06-23 18:00:55 +0000611 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000612 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 case ISD::INTRINSIC_VOID: {
614 SDValue Chain = Op.getOperand(0);
615 unsigned IntrinsicID =
616 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
617 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000618 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000619 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000620 const SDValue Args[8] = {
621 Chain,
622 Op.getOperand(2), // Export Value
623 Op.getOperand(3), // ArrayBase
624 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000625 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
626 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
627 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
628 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000629 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000630 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000631 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000632
Tom Stellard75aadc22012-12-11 21:25:42 +0000633 // default for switch(IntrinsicID)
634 default: break;
635 }
636 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
637 break;
638 }
639 case ISD::INTRINSIC_WO_CHAIN: {
640 unsigned IntrinsicID =
641 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
642 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000643 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000644 switch(IntrinsicID) {
645 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000646 case AMDGPUIntrinsic::R600_interp_xy:
647 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000648 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000649 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000650 SDValue RegisterINode = Op.getOperand(2);
651 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000652
Vincent Lejeunef143af32013-11-11 22:10:24 +0000653 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000654 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000655 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000656 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000657 else
658 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000659 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000660 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000661 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
662 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000663 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000664 case AMDGPUIntrinsic::r600_tex:
665 case AMDGPUIntrinsic::r600_texc:
666 case AMDGPUIntrinsic::r600_txl:
667 case AMDGPUIntrinsic::r600_txlc:
668 case AMDGPUIntrinsic::r600_txb:
669 case AMDGPUIntrinsic::r600_txbc:
670 case AMDGPUIntrinsic::r600_txf:
671 case AMDGPUIntrinsic::r600_txq:
672 case AMDGPUIntrinsic::r600_ddx:
673 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000674 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000675 unsigned TextureOp;
676 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000677 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000678 TextureOp = 0;
679 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000680 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000681 TextureOp = 1;
682 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000683 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000684 TextureOp = 2;
685 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000686 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000687 TextureOp = 3;
688 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000689 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000690 TextureOp = 4;
691 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000692 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 TextureOp = 5;
694 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000695 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 TextureOp = 6;
697 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000698 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000699 TextureOp = 7;
700 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000701 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 TextureOp = 8;
703 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000704 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000705 TextureOp = 9;
706 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000707 case AMDGPUIntrinsic::R600_ldptr:
708 TextureOp = 10;
709 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000710 default:
711 llvm_unreachable("Unknow Texture Operation");
712 }
713
714 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000715 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000717 DAG.getConstant(0, DL, MVT::i32),
718 DAG.getConstant(1, DL, MVT::i32),
719 DAG.getConstant(2, DL, MVT::i32),
720 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 Op.getOperand(2),
722 Op.getOperand(3),
723 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000724 DAG.getConstant(0, DL, MVT::i32),
725 DAG.getConstant(1, DL, MVT::i32),
726 DAG.getConstant(2, DL, MVT::i32),
727 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000728 Op.getOperand(5),
729 Op.getOperand(6),
730 Op.getOperand(7),
731 Op.getOperand(8),
732 Op.getOperand(9),
733 Op.getOperand(10)
734 };
Craig Topper48d114b2014-04-26 18:35:24 +0000735 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000736 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000737 case AMDGPUIntrinsic::AMDGPU_dp4: {
738 SDValue Args[8] = {
739 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000740 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000741 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000742 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000744 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 };
Craig Topper48d114b2014-04-26 18:35:24 +0000756 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000758
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000759 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000760 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000763 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000764 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return LowerImplicitParameter(DAG, VT, DL, 8);
777
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000778 case Intrinsic::r600_read_workdim:
779 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000780 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
781 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
782 }
Jan Veselye5121f32014-10-14 20:05:26 +0000783
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
786 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000787 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000788 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
789 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
792 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000802
803 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000804 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000805 return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
806
807 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000808 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000809 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
810 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 }
812 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
813 break;
814 }
815 } // end switch(Op.getOpcode())
816 return SDValue();
817}
818
819void R600TargetLowering::ReplaceNodeResults(SDNode *N,
820 SmallVectorImpl<SDValue> &Results,
821 SelectionDAG &DAG) const {
822 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000823 default:
824 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
825 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000826 case ISD::FP_TO_UINT:
827 if (N->getValueType(0) == MVT::i1) {
828 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
829 return;
830 }
831 // Fall-through. Since we don't care about out of bounds values
832 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
833 // considers some extra cases which are not necessary here.
834 case ISD::FP_TO_SINT: {
835 SDValue Result;
836 if (expandFP_TO_SINT(N, Result, DAG))
837 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000838 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000839 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000840 case ISD::SDIVREM: {
841 SDValue Op = SDValue(N, 1);
842 SDValue RES = LowerSDIVREM(Op, DAG);
843 Results.push_back(RES);
844 Results.push_back(RES.getValue(1));
845 break;
846 }
847 case ISD::UDIVREM: {
848 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000849 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000850 break;
851 }
852 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000853}
854
Tom Stellard880a80a2014-06-17 16:53:14 +0000855SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
856 SDValue Vector) const {
857
858 SDLoc DL(Vector);
859 EVT VecVT = Vector.getValueType();
860 EVT EltVT = VecVT.getVectorElementType();
861 SmallVector<SDValue, 8> Args;
862
863 for (unsigned i = 0, e = VecVT.getVectorNumElements();
864 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000865 Args.push_back(DAG.getNode(
866 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
867 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000868 }
869
870 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
871}
872
873SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
874 SelectionDAG &DAG) const {
875
876 SDLoc DL(Op);
877 SDValue Vector = Op.getOperand(0);
878 SDValue Index = Op.getOperand(1);
879
880 if (isa<ConstantSDNode>(Index) ||
881 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
882 return Op;
883
884 Vector = vectorToVerticalVector(DAG, Vector);
885 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
886 Vector, Index);
887}
888
889SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
890 SelectionDAG &DAG) const {
891 SDLoc DL(Op);
892 SDValue Vector = Op.getOperand(0);
893 SDValue Value = Op.getOperand(1);
894 SDValue Index = Op.getOperand(2);
895
896 if (isa<ConstantSDNode>(Index) ||
897 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
898 return Op;
899
900 Vector = vectorToVerticalVector(DAG, Vector);
901 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
902 Vector, Value, Index);
903 return vectorToVerticalVector(DAG, Insert);
904}
905
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000906SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
907 // On hw >= R700, COS/SIN input must be between -1. and 1.
908 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
909 EVT VT = Op.getValueType();
910 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000911 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000912
913 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000914 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
915 DAG.getNode(ISD::FADD, DL, VT,
916 DAG.getNode(ISD::FMUL, DL, VT, Arg,
917 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
918 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000919 unsigned TrigNode;
920 switch (Op.getOpcode()) {
921 case ISD::FCOS:
922 TrigNode = AMDGPUISD::COS_HW;
923 break;
924 case ISD::FSIN:
925 TrigNode = AMDGPUISD::SIN_HW;
926 break;
927 default:
928 llvm_unreachable("Wrong trig opcode");
929 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000930 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
931 DAG.getNode(ISD::FADD, DL, VT, FractPart,
932 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000933 if (Gen >= AMDGPUSubtarget::R700)
934 return TrigVal;
935 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000936 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
937 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000938}
939
Jan Vesely25f36272014-06-18 12:27:13 +0000940SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
941 SDLoc DL(Op);
942 EVT VT = Op.getValueType();
943
944 SDValue Lo = Op.getOperand(0);
945 SDValue Hi = Op.getOperand(1);
946 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000947 SDValue Zero = DAG.getConstant(0, DL, VT);
948 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000949
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000950 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
951 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000952 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
953 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
954
955 // The dance around Width1 is necessary for 0 special case.
956 // Without it the CompShift might be 32, producing incorrect results in
957 // Overflow. So we do the shift in two steps, the alternative is to
958 // add a conditional to filter the special case.
959
960 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
961 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
962
963 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
964 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
965 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
966
967 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
968 SDValue LoBig = Zero;
969
970 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
971 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
972
973 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
974}
975
Jan Vesely900ff2e2014-06-18 12:27:15 +0000976SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
977 SDLoc DL(Op);
978 EVT VT = Op.getValueType();
979
980 SDValue Lo = Op.getOperand(0);
981 SDValue Hi = Op.getOperand(1);
982 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000983 SDValue Zero = DAG.getConstant(0, DL, VT);
984 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000985
Jan Veselyecf51332014-06-18 12:27:17 +0000986 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
987
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000988 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
989 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000990 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
991 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
992
993 // The dance around Width1 is necessary for 0 special case.
994 // Without it the CompShift might be 32, producing incorrect results in
995 // Overflow. So we do the shift in two steps, the alternative is to
996 // add a conditional to filter the special case.
997
998 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
999 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1000
Jan Veselyecf51332014-06-18 12:27:17 +00001001 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001002 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1003 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1004
Jan Veselyecf51332014-06-18 12:27:17 +00001005 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1006 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001007
1008 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1009 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1010
1011 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1012}
1013
Jan Vesely808fff52015-04-30 17:15:56 +00001014SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1015 unsigned mainop, unsigned ovf) const {
1016 SDLoc DL(Op);
1017 EVT VT = Op.getValueType();
1018
1019 SDValue Lo = Op.getOperand(0);
1020 SDValue Hi = Op.getOperand(1);
1021
1022 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1023 // Extend sign.
1024 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1025 DAG.getValueType(MVT::i1));
1026
1027 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1028
1029 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1030}
1031
Tom Stellard75aadc22012-12-11 21:25:42 +00001032SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001033 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001034 return DAG.getNode(
1035 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001036 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001037 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001038 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001039 DAG.getCondCode(ISD::SETNE)
1040 );
1041}
1042
Tom Stellard75aadc22012-12-11 21:25:42 +00001043SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001044 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001045 unsigned DwordOffset) const {
1046 unsigned ByteOffset = DwordOffset * 4;
1047 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001048 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001049
1050 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1051 assert(isInt<16>(ByteOffset));
1052
1053 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001054 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001055 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1056 false, false, false, 0);
1057}
1058
Tom Stellard75aadc22012-12-11 21:25:42 +00001059bool R600TargetLowering::isZero(SDValue Op) const {
1060 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1061 return Cst->isNullValue();
1062 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1063 return CstFP->isZero();
1064 } else {
1065 return false;
1066 }
1067}
1068
1069SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001070 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001071 EVT VT = Op.getValueType();
1072
1073 SDValue LHS = Op.getOperand(0);
1074 SDValue RHS = Op.getOperand(1);
1075 SDValue True = Op.getOperand(2);
1076 SDValue False = Op.getOperand(3);
1077 SDValue CC = Op.getOperand(4);
1078 SDValue Temp;
1079
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001080 if (VT == MVT::f32) {
1081 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1082 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1083 if (MinMax)
1084 return MinMax;
1085 }
1086
Tom Stellard75aadc22012-12-11 21:25:42 +00001087 // LHS and RHS are guaranteed to be the same value type
1088 EVT CompareVT = LHS.getValueType();
1089
1090 // Check if we can lower this to a native operation.
1091
Tom Stellard2add82d2013-03-08 15:37:09 +00001092 // Try to lower to a SET* instruction:
1093 //
1094 // SET* can match the following patterns:
1095 //
Tom Stellardcd428182013-09-28 02:50:38 +00001096 // select_cc f32, f32, -1, 0, cc_supported
1097 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1098 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001099 //
1100
1101 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001102 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1103 ISD::CondCode InverseCC =
1104 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001105 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1106 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1107 std::swap(False, True);
1108 CC = DAG.getCondCode(InverseCC);
1109 } else {
1110 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1111 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1112 std::swap(False, True);
1113 std::swap(LHS, RHS);
1114 CC = DAG.getCondCode(SwapInvCC);
1115 }
1116 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001117 }
1118
1119 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1120 (CompareVT == VT || VT == MVT::i32)) {
1121 // This can be matched by a SET* instruction.
1122 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1123 }
1124
Tom Stellard75aadc22012-12-11 21:25:42 +00001125 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001126 //
1127 // CND* can match the following patterns:
1128 //
Tom Stellardcd428182013-09-28 02:50:38 +00001129 // select_cc f32, 0.0, f32, f32, cc_supported
1130 // select_cc f32, 0.0, i32, i32, cc_supported
1131 // select_cc i32, 0, f32, f32, cc_supported
1132 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001133 //
Tom Stellardcd428182013-09-28 02:50:38 +00001134
1135 // Try to move the zero value to the RHS
1136 if (isZero(LHS)) {
1137 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1138 // Try swapping the operands
1139 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1140 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1141 std::swap(LHS, RHS);
1142 CC = DAG.getCondCode(CCSwapped);
1143 } else {
1144 // Try inverting the conditon and then swapping the operands
1145 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1146 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1147 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1148 std::swap(True, False);
1149 std::swap(LHS, RHS);
1150 CC = DAG.getCondCode(CCSwapped);
1151 }
1152 }
1153 }
1154 if (isZero(RHS)) {
1155 SDValue Cond = LHS;
1156 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001157 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1158 if (CompareVT != VT) {
1159 // Bitcast True / False to the correct types. This will end up being
1160 // a nop, but it allows us to define only a single pattern in the
1161 // .TD files for each CND* instruction rather than having to have
1162 // one pattern for integer True/False and one for fp True/False
1163 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1164 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1165 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001166
1167 switch (CCOpcode) {
1168 case ISD::SETONE:
1169 case ISD::SETUNE:
1170 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001171 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1172 Temp = True;
1173 True = False;
1174 False = Temp;
1175 break;
1176 default:
1177 break;
1178 }
1179 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1180 Cond, Zero,
1181 True, False,
1182 DAG.getCondCode(CCOpcode));
1183 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1184 }
1185
Tom Stellard75aadc22012-12-11 21:25:42 +00001186 // If we make it this for it means we have no native instructions to handle
1187 // this SELECT_CC, so we must lower it.
1188 SDValue HWTrue, HWFalse;
1189
1190 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001191 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1192 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001193 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001194 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1195 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001196 }
1197 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001198 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001199 }
1200
1201 // Lower this unsupported SELECT_CC into a combination of two supported
1202 // SELECT_CC operations.
1203 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1204
1205 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1206 Cond, HWFalse,
1207 True, False,
1208 DAG.getCondCode(ISD::SETNE));
1209}
1210
Alp Tokercb402912014-01-24 17:20:08 +00001211/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001212/// convert these pointers to a register index. Each register holds
1213/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1214/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1215/// for indirect addressing.
1216SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1217 unsigned StackWidth,
1218 SelectionDAG &DAG) const {
1219 unsigned SRLPad;
1220 switch(StackWidth) {
1221 case 1:
1222 SRLPad = 2;
1223 break;
1224 case 2:
1225 SRLPad = 3;
1226 break;
1227 case 4:
1228 SRLPad = 4;
1229 break;
1230 default: llvm_unreachable("Invalid stack width");
1231 }
1232
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001233 SDLoc DL(Ptr);
1234 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1235 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001236}
1237
1238void R600TargetLowering::getStackAddress(unsigned StackWidth,
1239 unsigned ElemIdx,
1240 unsigned &Channel,
1241 unsigned &PtrIncr) const {
1242 switch (StackWidth) {
1243 default:
1244 case 1:
1245 Channel = 0;
1246 if (ElemIdx > 0) {
1247 PtrIncr = 1;
1248 } else {
1249 PtrIncr = 0;
1250 }
1251 break;
1252 case 2:
1253 Channel = ElemIdx % 2;
1254 if (ElemIdx == 2) {
1255 PtrIncr = 1;
1256 } else {
1257 PtrIncr = 0;
1258 }
1259 break;
1260 case 4:
1261 Channel = ElemIdx;
1262 PtrIncr = 0;
1263 break;
1264 }
1265}
1266
Tom Stellard75aadc22012-12-11 21:25:42 +00001267SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001268 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001269 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1270 SDValue Chain = Op.getOperand(0);
1271 SDValue Value = Op.getOperand(1);
1272 SDValue Ptr = Op.getOperand(2);
1273
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001274 if (SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001275 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001276
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001277 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1278 if (StoreNode->isTruncatingStore()) {
1279 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001280 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001281 EVT MemVT = StoreNode->getMemoryVT();
1282 SDValue MaskConstant;
1283 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001284 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001285 } else {
1286 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001287 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001288 }
1289 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001290 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001291 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001292 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001293 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1294 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001295 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001296 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1297 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1298 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1299 // vector instead.
1300 SDValue Src[4] = {
1301 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001302 DAG.getConstant(0, DL, MVT::i32),
1303 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001304 Mask
1305 };
Craig Topper48d114b2014-04-26 18:35:24 +00001306 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001307 SDValue Args[3] = { Chain, Input, DWordAddr };
1308 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001309 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001310 StoreNode->getMemOperand());
1311 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1312 Value.getValueType().bitsGE(MVT::i32)) {
1313 // Convert pointer from byte address to dword address.
1314 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1315 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001316 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001317
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001318 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001319 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001320 } else {
1321 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1322 }
1323 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001324 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001325 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001326
1327 EVT ValueVT = Value.getValueType();
1328
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001329 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001330 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001331
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001332 if (SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
Tom Stellarde9373602014-01-22 19:24:14 +00001333 return Ret;
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001334
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001335 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001336 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001337 const AMDGPUFrameLowering *TFL =
1338 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001339 unsigned StackWidth = TFL->getStackWidth(MF);
1340
1341 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1342
1343 if (ValueVT.isVector()) {
1344 unsigned NumElemVT = ValueVT.getVectorNumElements();
1345 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001346 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001347
1348 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1349 "vector width in load");
1350
1351 for (unsigned i = 0; i < NumElemVT; ++i) {
1352 unsigned Channel, PtrIncr;
1353 getStackAddress(StackWidth, i, Channel, PtrIncr);
1354 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001355 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001356 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001357 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001358
1359 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1360 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001361 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001362 }
Craig Topper48d114b2014-04-26 18:35:24 +00001363 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001364 } else {
1365 if (ValueVT == MVT::i8) {
1366 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1367 }
1368 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001369 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001370 }
1371
1372 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001373}
1374
Tom Stellard365366f2013-01-23 02:09:06 +00001375// return (512 + (kc_bank << 12)
1376static int
1377ConstantAddressBlock(unsigned AddressSpace) {
1378 switch (AddressSpace) {
1379 case AMDGPUAS::CONSTANT_BUFFER_0:
1380 return 512;
1381 case AMDGPUAS::CONSTANT_BUFFER_1:
1382 return 512 + 4096;
1383 case AMDGPUAS::CONSTANT_BUFFER_2:
1384 return 512 + 4096 * 2;
1385 case AMDGPUAS::CONSTANT_BUFFER_3:
1386 return 512 + 4096 * 3;
1387 case AMDGPUAS::CONSTANT_BUFFER_4:
1388 return 512 + 4096 * 4;
1389 case AMDGPUAS::CONSTANT_BUFFER_5:
1390 return 512 + 4096 * 5;
1391 case AMDGPUAS::CONSTANT_BUFFER_6:
1392 return 512 + 4096 * 6;
1393 case AMDGPUAS::CONSTANT_BUFFER_7:
1394 return 512 + 4096 * 7;
1395 case AMDGPUAS::CONSTANT_BUFFER_8:
1396 return 512 + 4096 * 8;
1397 case AMDGPUAS::CONSTANT_BUFFER_9:
1398 return 512 + 4096 * 9;
1399 case AMDGPUAS::CONSTANT_BUFFER_10:
1400 return 512 + 4096 * 10;
1401 case AMDGPUAS::CONSTANT_BUFFER_11:
1402 return 512 + 4096 * 11;
1403 case AMDGPUAS::CONSTANT_BUFFER_12:
1404 return 512 + 4096 * 12;
1405 case AMDGPUAS::CONSTANT_BUFFER_13:
1406 return 512 + 4096 * 13;
1407 case AMDGPUAS::CONSTANT_BUFFER_14:
1408 return 512 + 4096 * 14;
1409 case AMDGPUAS::CONSTANT_BUFFER_15:
1410 return 512 + 4096 * 15;
1411 default:
1412 return -1;
1413 }
1414}
1415
1416SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1417{
1418 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001419 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001420 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1421 SDValue Chain = Op.getOperand(0);
1422 SDValue Ptr = Op.getOperand(1);
1423 SDValue LoweredLoad;
1424
Matt Arsenault8b03e6c2015-07-09 18:47:03 +00001425 if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1426 return Ret;
Tom Stellarde9373602014-01-22 19:24:14 +00001427
Tom Stellard067c8152014-07-21 14:01:14 +00001428 // Lower loads constant address space global variable loads
1429 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001430 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001431 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001432
Mehdi Amini44ede332015-07-09 02:09:04 +00001433 SDValue Ptr = DAG.getZExtOrTrunc(
1434 LoadNode->getBasePtr(), DL,
1435 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001436 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001437 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001438 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1439 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001440 DAG.getTargetConstant(0, DL, MVT::i32),
1441 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001442 }
Tom Stellarde9373602014-01-22 19:24:14 +00001443
Tom Stellard35bb18c2013-08-26 15:06:04 +00001444 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1445 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001446 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001447 Chain
1448 };
Craig Topper64941d92014-04-27 19:20:57 +00001449 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001450 }
1451
Tom Stellard365366f2013-01-23 02:09:06 +00001452 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001453 if (ConstantBlock > -1 &&
1454 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1455 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001456 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001457 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1458 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001459 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001460 SDValue Slots[4];
1461 for (unsigned i = 0; i < 4; i++) {
1462 // We want Const position encoded with the following formula :
1463 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1464 // const_index is Ptr computed by llvm using an alignment of 16.
1465 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1466 // then div by 4 at the ISel step
1467 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001468 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001469 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1470 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001471 EVT NewVT = MVT::v4i32;
1472 unsigned NumElements = 4;
1473 if (VT.isVector()) {
1474 NewVT = VT;
1475 NumElements = VT.getVectorNumElements();
1476 }
Craig Topper48d114b2014-04-26 18:35:24 +00001477 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001478 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001479 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001480 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001481 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001482 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1483 DAG.getConstant(4, DL, MVT::i32)),
1484 DAG.getConstant(LoadNode->getAddressSpace() -
1485 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001486 );
1487 }
1488
1489 if (!VT.isVector()) {
1490 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001491 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001492 }
1493
1494 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001495 Result,
1496 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001497 };
Craig Topper64941d92014-04-27 19:20:57 +00001498 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001499 }
1500
Matt Arsenault909d0c02013-10-30 23:43:29 +00001501 // For most operations returning SDValue() will result in the node being
1502 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1503 // need to manually expand loads that may be legal in some address spaces and
1504 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1505 // compute shaders, since the data is sign extended when it is uploaded to the
1506 // buffer. However SEXT loads from other address spaces are not supported, so
1507 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001508 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1509 EVT MemVT = LoadNode->getMemoryVT();
1510 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001511 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1512 LoadNode->getPointerInfo(), MemVT,
1513 LoadNode->isVolatile(),
1514 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001515 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001516 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001517 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1518 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001519
Jan Veselyb670d372015-05-26 18:07:22 +00001520 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001521 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001522 }
1523
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001524 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1525 return SDValue();
1526 }
1527
1528 // Lowering for indirect addressing
1529 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001530 const AMDGPUFrameLowering *TFL =
1531 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001532 unsigned StackWidth = TFL->getStackWidth(MF);
1533
1534 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1535
1536 if (VT.isVector()) {
1537 unsigned NumElemVT = VT.getVectorNumElements();
1538 EVT ElemVT = VT.getVectorElementType();
1539 SDValue Loads[4];
1540
1541 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1542 "vector width in load");
1543
1544 for (unsigned i = 0; i < NumElemVT; ++i) {
1545 unsigned Channel, PtrIncr;
1546 getStackAddress(StackWidth, i, Channel, PtrIncr);
1547 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001548 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001549 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1550 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001551 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001552 Op.getOperand(2));
1553 }
1554 for (unsigned i = NumElemVT; i < 4; ++i) {
1555 Loads[i] = DAG.getUNDEF(ElemVT);
1556 }
1557 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001558 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001559 } else {
1560 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1561 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001562 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001563 Op.getOperand(2));
1564 }
1565
Matt Arsenault7939acd2014-04-07 16:44:24 +00001566 SDValue Ops[2] = {
1567 LoweredLoad,
1568 Chain
1569 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001570
Craig Topper64941d92014-04-27 19:20:57 +00001571 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001572}
Tom Stellard75aadc22012-12-11 21:25:42 +00001573
Matt Arsenault1d555c42014-06-23 18:00:55 +00001574SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1575 SDValue Chain = Op.getOperand(0);
1576 SDValue Cond = Op.getOperand(1);
1577 SDValue Jump = Op.getOperand(2);
1578
1579 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1580 Chain, Jump, Cond);
1581}
1582
Tom Stellard75aadc22012-12-11 21:25:42 +00001583/// XXX Only kernel functions are supported, so we can assume for now that
1584/// every function is a kernel function, but in the future we should use
1585/// separate calling conventions for kernel and non-kernel functions.
1586SDValue R600TargetLowering::LowerFormalArguments(
1587 SDValue Chain,
1588 CallingConv::ID CallConv,
1589 bool isVarArg,
1590 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001591 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001592 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001593 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001594 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1595 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001596 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001597 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001598
Tom Stellardaf775432013-10-23 00:44:32 +00001599 SmallVector<ISD::InputArg, 8> LocalIns;
1600
Matt Arsenault209a7b92014-04-18 07:40:20 +00001601 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001602
1603 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001604
Tom Stellard1e803092013-07-23 01:48:18 +00001605 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001606 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001607 const ISD::InputArg &In = Ins[i];
1608 EVT VT = In.VT;
1609 EVT MemVT = VA.getLocVT();
1610 if (!VT.isVector() && MemVT.isVector()) {
1611 // Get load source type if scalarized.
1612 MemVT = MemVT.getVectorElementType();
1613 }
Tom Stellard78e01292013-07-23 01:47:58 +00001614
Jan Veselye5121f32014-10-14 20:05:26 +00001615 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001616 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1617 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1618 InVals.push_back(Register);
1619 continue;
1620 }
1621
Tom Stellard75aadc22012-12-11 21:25:42 +00001622 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001623 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001624
Matt Arsenaultfae02982014-03-17 18:58:11 +00001625 // i64 isn't a legal type, so the register type used ends up as i32, which
1626 // isn't expected here. It attempts to create this sextload, but it ends up
1627 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1628 // for <1 x i64>.
1629
Tom Stellardacfeebf2013-07-23 01:48:05 +00001630 // The first 36 bytes of the input buffer contains information about
1631 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001632 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1633 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1634 // FIXME: This should really check the extload type, but the handling of
1635 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001636
Matt Arsenault74ef2772014-08-13 18:14:11 +00001637 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1638 Ext = ISD::SEXTLOAD;
1639 }
1640
1641 // Compute the offset from the value.
1642 // XXX - I think PartOffset should give you this, but it seems to give the
1643 // size of the register which isn't useful.
1644
Andrew Trick05938a52015-02-16 18:10:47 +00001645 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001646 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001647 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001648
1649 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1650 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001651 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001652 DAG.getUNDEF(MVT::i32),
1653 PtrInfo,
1654 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001655
1656 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001657 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001658 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001659 }
1660 return Chain;
1661}
1662
Mehdi Amini44ede332015-07-09 02:09:04 +00001663EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1664 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001665 if (!VT.isVector())
1666 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001667 return VT.changeVectorElementTypeToInteger();
1668}
1669
Matt Arsenault209a7b92014-04-18 07:40:20 +00001670static SDValue CompactSwizzlableVector(
1671 SelectionDAG &DAG, SDValue VectorEntry,
1672 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001673 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1674 assert(RemapSwizzle.empty());
1675 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001676 VectorEntry.getOperand(0),
1677 VectorEntry.getOperand(1),
1678 VectorEntry.getOperand(2),
1679 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001680 };
1681
1682 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001683 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1684 // We mask write here to teach later passes that the ith element of this
1685 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1686 // break false dependencies and additionnaly make assembly easier to read.
1687 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001688 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1689 if (C->isZero()) {
1690 RemapSwizzle[i] = 4; // SEL_0
1691 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1692 } else if (C->isExactlyValue(1.0)) {
1693 RemapSwizzle[i] = 5; // SEL_1
1694 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1695 }
1696 }
1697
1698 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1699 continue;
1700 for (unsigned j = 0; j < i; j++) {
1701 if (NewBldVec[i] == NewBldVec[j]) {
1702 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1703 RemapSwizzle[i] = j;
1704 break;
1705 }
1706 }
1707 }
1708
1709 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001710 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001711}
1712
Benjamin Kramer193960c2013-06-11 13:32:25 +00001713static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1714 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001715 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1716 assert(RemapSwizzle.empty());
1717 SDValue NewBldVec[4] = {
1718 VectorEntry.getOperand(0),
1719 VectorEntry.getOperand(1),
1720 VectorEntry.getOperand(2),
1721 VectorEntry.getOperand(3)
1722 };
1723 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001724 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001725 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001726 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1727 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1728 ->getZExtValue();
1729 if (i == Idx)
1730 isUnmovable[Idx] = true;
1731 }
1732 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001733
1734 for (unsigned i = 0; i < 4; i++) {
1735 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1736 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1737 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001738 if (isUnmovable[Idx])
1739 continue;
1740 // Swap i and Idx
1741 std::swap(NewBldVec[Idx], NewBldVec[i]);
1742 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1743 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001744 }
1745 }
1746
1747 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001748 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001749}
1750
1751
1752SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001753 SDValue Swz[4], SelectionDAG &DAG,
1754 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001755 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1756 // Old -> New swizzle values
1757 DenseMap<unsigned, unsigned> SwizzleRemap;
1758
1759 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1760 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001761 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001762 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001763 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001764 }
1765
1766 SwizzleRemap.clear();
1767 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1768 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001769 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001770 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001771 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001772 }
1773
1774 return BuildVector;
1775}
1776
1777
Tom Stellard75aadc22012-12-11 21:25:42 +00001778//===----------------------------------------------------------------------===//
1779// Custom DAG Optimizations
1780//===----------------------------------------------------------------------===//
1781
1782SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1783 DAGCombinerInfo &DCI) const {
1784 SelectionDAG &DAG = DCI.DAG;
1785
1786 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001787 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001788 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1789 case ISD::FP_ROUND: {
1790 SDValue Arg = N->getOperand(0);
1791 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001792 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001793 Arg.getOperand(0));
1794 }
1795 break;
1796 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001797
1798 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1799 // (i32 select_cc f32, f32, -1, 0 cc)
1800 //
1801 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1802 // this to one of the SET*_DX10 instructions.
1803 case ISD::FP_TO_SINT: {
1804 SDValue FNeg = N->getOperand(0);
1805 if (FNeg.getOpcode() != ISD::FNEG) {
1806 return SDValue();
1807 }
1808 SDValue SelectCC = FNeg.getOperand(0);
1809 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1810 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1811 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1812 !isHWTrueValue(SelectCC.getOperand(2)) ||
1813 !isHWFalseValue(SelectCC.getOperand(3))) {
1814 return SDValue();
1815 }
1816
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001817 SDLoc dl(N);
1818 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001819 SelectCC.getOperand(0), // LHS
1820 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001821 DAG.getConstant(-1, dl, MVT::i32), // True
1822 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001823 SelectCC.getOperand(4)); // CC
1824
1825 break;
1826 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001827
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001828 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1829 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001830 case ISD::INSERT_VECTOR_ELT: {
1831 SDValue InVec = N->getOperand(0);
1832 SDValue InVal = N->getOperand(1);
1833 SDValue EltNo = N->getOperand(2);
1834 SDLoc dl(N);
1835
1836 // If the inserted element is an UNDEF, just use the input vector.
1837 if (InVal.getOpcode() == ISD::UNDEF)
1838 return InVec;
1839
1840 EVT VT = InVec.getValueType();
1841
1842 // If we can't generate a legal BUILD_VECTOR, exit
1843 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1844 return SDValue();
1845
1846 // Check that we know which element is being inserted
1847 if (!isa<ConstantSDNode>(EltNo))
1848 return SDValue();
1849 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1850
1851 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1852 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1853 // vector elements.
1854 SmallVector<SDValue, 8> Ops;
1855 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1856 Ops.append(InVec.getNode()->op_begin(),
1857 InVec.getNode()->op_end());
1858 } else if (InVec.getOpcode() == ISD::UNDEF) {
1859 unsigned NElts = VT.getVectorNumElements();
1860 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1861 } else {
1862 return SDValue();
1863 }
1864
1865 // Insert the element
1866 if (Elt < Ops.size()) {
1867 // All the operands of BUILD_VECTOR must have the same type;
1868 // we enforce that here.
1869 EVT OpVT = Ops[0].getValueType();
1870 if (InVal.getValueType() != OpVT)
1871 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1872 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1873 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1874 Ops[Elt] = InVal;
1875 }
1876
1877 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001878 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001879 }
1880
Tom Stellard365366f2013-01-23 02:09:06 +00001881 // Extract_vec (Build_vector) generated by custom lowering
1882 // also needs to be customly combined
1883 case ISD::EXTRACT_VECTOR_ELT: {
1884 SDValue Arg = N->getOperand(0);
1885 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1886 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1887 unsigned Element = Const->getZExtValue();
1888 return Arg->getOperand(Element);
1889 }
1890 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001891 if (Arg.getOpcode() == ISD::BITCAST &&
1892 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1893 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1894 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001895 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001896 Arg->getOperand(0).getOperand(Element));
1897 }
1898 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00001899 break;
Tom Stellard365366f2013-01-23 02:09:06 +00001900 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001901
1902 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001903 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001904 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00001905 return Ret;
1906
Tom Stellarde06163a2013-02-07 14:02:35 +00001907 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1908 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001909 //
1910 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1911 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001912 SDValue LHS = N->getOperand(0);
1913 if (LHS.getOpcode() != ISD::SELECT_CC) {
1914 return SDValue();
1915 }
1916
1917 SDValue RHS = N->getOperand(1);
1918 SDValue True = N->getOperand(2);
1919 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001920 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001921
1922 if (LHS.getOperand(2).getNode() != True.getNode() ||
1923 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001924 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001925 return SDValue();
1926 }
1927
Tom Stellard5e524892013-03-08 15:37:11 +00001928 switch (NCC) {
1929 default: return SDValue();
1930 case ISD::SETNE: return LHS;
1931 case ISD::SETEQ: {
1932 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1933 LHSCC = ISD::getSetCCInverse(LHSCC,
1934 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001935 if (DCI.isBeforeLegalizeOps() ||
1936 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1937 return DAG.getSelectCC(SDLoc(N),
1938 LHS.getOperand(0),
1939 LHS.getOperand(1),
1940 LHS.getOperand(2),
1941 LHS.getOperand(3),
1942 LHSCC);
1943 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001944 }
Tom Stellard5e524892013-03-08 15:37:11 +00001945 }
Tom Stellardcd428182013-09-28 02:50:38 +00001946 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001947 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001948
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001949 case AMDGPUISD::EXPORT: {
1950 SDValue Arg = N->getOperand(1);
1951 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1952 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001953
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001954 SDValue NewArgs[8] = {
1955 N->getOperand(0), // Chain
1956 SDValue(),
1957 N->getOperand(2), // ArrayBase
1958 N->getOperand(3), // Type
1959 N->getOperand(4), // SWZ_X
1960 N->getOperand(5), // SWZ_Y
1961 N->getOperand(6), // SWZ_Z
1962 N->getOperand(7) // SWZ_W
1963 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001964 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001965 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00001966 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001967 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001968 case AMDGPUISD::TEXTURE_FETCH: {
1969 SDValue Arg = N->getOperand(1);
1970 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1971 break;
1972
1973 SDValue NewArgs[19] = {
1974 N->getOperand(0),
1975 N->getOperand(1),
1976 N->getOperand(2),
1977 N->getOperand(3),
1978 N->getOperand(4),
1979 N->getOperand(5),
1980 N->getOperand(6),
1981 N->getOperand(7),
1982 N->getOperand(8),
1983 N->getOperand(9),
1984 N->getOperand(10),
1985 N->getOperand(11),
1986 N->getOperand(12),
1987 N->getOperand(13),
1988 N->getOperand(14),
1989 N->getOperand(15),
1990 N->getOperand(16),
1991 N->getOperand(17),
1992 N->getOperand(18),
1993 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001994 SDLoc DL(N);
1995 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1996 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001997 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001998 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00001999
2000 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002001}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002002
2003static bool
2004FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002005 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002006 const R600InstrInfo *TII =
2007 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002008 if (!Src.isMachineOpcode())
2009 return false;
2010 switch (Src.getMachineOpcode()) {
2011 case AMDGPU::FNEG_R600:
2012 if (!Neg.getNode())
2013 return false;
2014 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002015 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002016 return true;
2017 case AMDGPU::FABS_R600:
2018 if (!Abs.getNode())
2019 return false;
2020 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002021 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002022 return true;
2023 case AMDGPU::CONST_COPY: {
2024 unsigned Opcode = ParentNode->getMachineOpcode();
2025 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2026
2027 if (!Sel.getNode())
2028 return false;
2029
2030 SDValue CstOffset = Src.getOperand(0);
2031 if (ParentNode->getValueType(0).isVector())
2032 return false;
2033
2034 // Gather constants values
2035 int SrcIndices[] = {
2036 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2037 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2038 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2039 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2040 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2041 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2042 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2043 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2044 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2045 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2046 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2047 };
2048 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002049 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002050 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2051 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2052 continue;
2053 if (HasDst) {
2054 OtherSrcIdx--;
2055 OtherSelIdx--;
2056 }
2057 if (RegisterSDNode *Reg =
2058 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2059 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002060 ConstantSDNode *Cst
2061 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002062 Consts.push_back(Cst->getZExtValue());
2063 }
2064 }
2065 }
2066
Matt Arsenault37c12d72014-05-12 20:42:57 +00002067 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002068 Consts.push_back(Cst->getZExtValue());
2069 if (!TII->fitsConstReadLimitations(Consts)) {
2070 return false;
2071 }
2072
2073 Sel = CstOffset;
2074 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2075 return true;
2076 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002077 case AMDGPU::MOV_IMM_I32:
2078 case AMDGPU::MOV_IMM_F32: {
2079 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2080 uint64_t ImmValue = 0;
2081
2082
2083 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2084 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2085 float FloatValue = FPC->getValueAPF().convertToFloat();
2086 if (FloatValue == 0.0) {
2087 ImmReg = AMDGPU::ZERO;
2088 } else if (FloatValue == 0.5) {
2089 ImmReg = AMDGPU::HALF;
2090 } else if (FloatValue == 1.0) {
2091 ImmReg = AMDGPU::ONE;
2092 } else {
2093 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2094 }
2095 } else {
2096 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2097 uint64_t Value = C->getZExtValue();
2098 if (Value == 0) {
2099 ImmReg = AMDGPU::ZERO;
2100 } else if (Value == 1) {
2101 ImmReg = AMDGPU::ONE_INT;
2102 } else {
2103 ImmValue = Value;
2104 }
2105 }
2106
2107 // Check that we aren't already using an immediate.
2108 // XXX: It's possible for an instruction to have more than one
2109 // immediate operand, but this is not supported yet.
2110 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2111 if (!Imm.getNode())
2112 return false;
2113 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2114 assert(C);
2115 if (C->getZExtValue())
2116 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002117 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002118 }
2119 Src = DAG.getRegister(ImmReg, MVT::i32);
2120 return true;
2121 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002122 default:
2123 return false;
2124 }
2125}
2126
2127
2128/// \brief Fold the instructions after selecting them
2129SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2130 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002131 const R600InstrInfo *TII =
2132 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002133 if (!Node->isMachineOpcode())
2134 return Node;
2135 unsigned Opcode = Node->getMachineOpcode();
2136 SDValue FakeOp;
2137
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002138 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002139
2140 if (Opcode == AMDGPU::DOT_4) {
2141 int OperandIdx[] = {
2142 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2143 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2144 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2145 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2146 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2147 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2148 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2149 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002150 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002151 int NegIdx[] = {
2152 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2153 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2154 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2155 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2156 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2157 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2158 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2159 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2160 };
2161 int AbsIdx[] = {
2162 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2163 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2164 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2165 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2166 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2167 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2168 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2169 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2170 };
2171 for (unsigned i = 0; i < 8; i++) {
2172 if (OperandIdx[i] < 0)
2173 return Node;
2174 SDValue &Src = Ops[OperandIdx[i] - 1];
2175 SDValue &Neg = Ops[NegIdx[i] - 1];
2176 SDValue &Abs = Ops[AbsIdx[i] - 1];
2177 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2178 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2179 if (HasDst)
2180 SelIdx--;
2181 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002182 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2183 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2184 }
2185 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2186 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2187 SDValue &Src = Ops[i];
2188 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002189 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2190 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002191 } else if (Opcode == AMDGPU::CLAMP_R600) {
2192 SDValue Src = Node->getOperand(0);
2193 if (!Src.isMachineOpcode() ||
2194 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2195 return Node;
2196 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2197 AMDGPU::OpName::clamp);
2198 if (ClampIdx < 0)
2199 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002200 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002201 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002202 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2203 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2204 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002205 } else {
2206 if (!TII->hasInstrModifiers(Opcode))
2207 return Node;
2208 int OperandIdx[] = {
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2212 };
2213 int NegIdx[] = {
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2217 };
2218 int AbsIdx[] = {
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2221 -1
2222 };
2223 for (unsigned i = 0; i < 3; i++) {
2224 if (OperandIdx[i] < 0)
2225 return Node;
2226 SDValue &Src = Ops[OperandIdx[i] - 1];
2227 SDValue &Neg = Ops[NegIdx[i] - 1];
2228 SDValue FakeAbs;
2229 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2230 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2231 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002232 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2233 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002234 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002235 ImmIdx--;
2236 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002237 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002238 SDValue &Imm = Ops[ImmIdx];
2239 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002240 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2241 }
2242 }
2243
2244 return Node;
2245}