blob: 92deda1502ff96f9c6efedf7e74f0ed96c13d017 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000289 case AMDGPU::RAT_STORE_TYPED_eg: {
290 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
291 .addOperand(MI->getOperand(0))
292 .addOperand(MI->getOperand(1))
293 .addOperand(MI->getOperand(2))
294 .addImm(isEOP(I)); // Set End of program bit
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Tom Stellard75aadc22012-12-11 21:25:42 +0000298 case AMDGPU::TXD: {
299 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
300 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000301 MachineOperand &RID = MI->getOperand(4);
302 MachineOperand &SID = MI->getOperand(5);
303 unsigned TextureId = MI->getOperand(6).getImm();
304 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
305 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000306
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 switch (TextureId) {
308 case 5: // Rect
309 CTX = CTY = 0;
310 break;
311 case 6: // Shadow1D
312 SrcW = SrcZ;
313 break;
314 case 7: // Shadow2D
315 SrcW = SrcZ;
316 break;
317 case 8: // ShadowRect
318 CTX = CTY = 0;
319 SrcW = SrcZ;
320 break;
321 case 9: // 1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 10: // 2DArray
326 CTZ = 0;
327 break;
328 case 11: // Shadow1DArray
329 SrcZ = SrcY;
330 CTZ = 0;
331 break;
332 case 12: // Shadow2DArray
333 CTZ = 0;
334 break;
335 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
337 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
356 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
375 .addOperand(MI->getOperand(0))
376 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000377 .addImm(SrcX)
378 .addImm(SrcY)
379 .addImm(SrcZ)
380 .addImm(SrcW)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(1)
386 .addImm(2)
387 .addImm(3)
388 .addOperand(RID)
389 .addOperand(SID)
390 .addImm(CTX)
391 .addImm(CTY)
392 .addImm(CTZ)
393 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 .addReg(T0, RegState::Implicit)
395 .addReg(T1, RegState::Implicit);
396 break;
397 }
398
399 case AMDGPU::TXD_SHADOW: {
400 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
401 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000402 MachineOperand &RID = MI->getOperand(4);
403 MachineOperand &SID = MI->getOperand(5);
404 unsigned TextureId = MI->getOperand(6).getImm();
405 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
406 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
407
408 switch (TextureId) {
409 case 5: // Rect
410 CTX = CTY = 0;
411 break;
412 case 6: // Shadow1D
413 SrcW = SrcZ;
414 break;
415 case 7: // Shadow2D
416 SrcW = SrcZ;
417 break;
418 case 8: // ShadowRect
419 CTX = CTY = 0;
420 SrcW = SrcZ;
421 break;
422 case 9: // 1DArray
423 SrcZ = SrcY;
424 CTZ = 0;
425 break;
426 case 10: // 2DArray
427 CTZ = 0;
428 break;
429 case 11: // Shadow1DArray
430 SrcZ = SrcY;
431 CTZ = 0;
432 break;
433 case 12: // Shadow2DArray
434 CTZ = 0;
435 break;
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437
438 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
439 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
458 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
477 .addOperand(MI->getOperand(0))
478 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000479 .addImm(SrcX)
480 .addImm(SrcY)
481 .addImm(SrcZ)
482 .addImm(SrcW)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(0)
487 .addImm(1)
488 .addImm(2)
489 .addImm(3)
490 .addOperand(RID)
491 .addOperand(SID)
492 .addImm(CTX)
493 .addImm(CTY)
494 .addImm(CTZ)
495 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addReg(T0, RegState::Implicit)
497 .addReg(T1, RegState::Implicit);
498 break;
499 }
500
501 case AMDGPU::BRANCH:
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 break;
505
506 case AMDGPU::BRANCH_COND_f32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
520 case AMDGPU::BRANCH_COND_i32: {
521 MachineInstr *NewMI =
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
523 AMDGPU::PREDICATE_BIT)
524 .addOperand(MI->getOperand(1))
525 .addImm(OPCODE_IS_NOT_ZERO_INT)
526 .addImm(0); // Flags
527 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 .addOperand(MI->getOperand(0))
530 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
531 break;
532 }
533
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 case AMDGPU::EG_ExportSwz:
535 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 // Instruction is left unmodified if its not the last one of its type
537 bool isLastInstructionOfItsType = true;
538 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000539 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000540 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
543 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
544 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
545 .getImm();
546 if (CurrentInstExportType == InstExportType) {
547 isLastInstructionOfItsType = false;
548 break;
549 }
550 }
551 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000552 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000553 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 return BB;
555 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
556 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
557 .addOperand(MI->getOperand(0))
558 .addOperand(MI->getOperand(1))
559 .addOperand(MI->getOperand(2))
560 .addOperand(MI->getOperand(3))
561 .addOperand(MI->getOperand(4))
562 .addOperand(MI->getOperand(5))
563 .addOperand(MI->getOperand(6))
564 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 break;
567 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000568 case AMDGPU::RETURN: {
569 // RETURN instructions must have the live-out registers as implicit uses,
570 // otherwise they appear dead.
571 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
572 MachineInstrBuilder MIB(*MF, MI);
573 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
574 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
575 return BB;
576 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000577 }
578
579 MI->eraseFromParent();
580 return BB;
581}
582
583//===----------------------------------------------------------------------===//
584// Custom DAG Lowering Operations
585//===----------------------------------------------------------------------===//
586
Tom Stellard75aadc22012-12-11 21:25:42 +0000587SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000588 MachineFunction &MF = DAG.getMachineFunction();
589 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 switch (Op.getOpcode()) {
591 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000592 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000594 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000595 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000596 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000597 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
598 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000599 case ISD::FCOS:
600 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000603 case ISD::LOAD: {
604 SDValue Result = LowerLOAD(Op, DAG);
605 assert((!Result.getNode() ||
606 Result.getNode()->getNumValues() == 2) &&
607 "Load should return a value and a chain");
608 return Result;
609 }
610
Matt Arsenault1d555c42014-06-23 18:00:55 +0000611 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000612 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 case ISD::INTRINSIC_VOID: {
614 SDValue Chain = Op.getOperand(0);
615 unsigned IntrinsicID =
616 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
617 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000618 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000619 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000620 const SDValue Args[8] = {
621 Chain,
622 Op.getOperand(2), // Export Value
623 Op.getOperand(3), // ArrayBase
624 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000625 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
626 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
627 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
628 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000629 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000630 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000631 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000632
Tom Stellard75aadc22012-12-11 21:25:42 +0000633 // default for switch(IntrinsicID)
634 default: break;
635 }
636 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
637 break;
638 }
639 case ISD::INTRINSIC_WO_CHAIN: {
640 unsigned IntrinsicID =
641 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
642 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000643 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000644 switch(IntrinsicID) {
645 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000646 case AMDGPUIntrinsic::R600_interp_xy:
647 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000648 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000649 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000650 SDValue RegisterINode = Op.getOperand(2);
651 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000652
Vincent Lejeunef143af32013-11-11 22:10:24 +0000653 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000654 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000655 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000656 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000657 else
658 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000659 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000660 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000661 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
662 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000663 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000664 case AMDGPUIntrinsic::r600_tex:
665 case AMDGPUIntrinsic::r600_texc:
666 case AMDGPUIntrinsic::r600_txl:
667 case AMDGPUIntrinsic::r600_txlc:
668 case AMDGPUIntrinsic::r600_txb:
669 case AMDGPUIntrinsic::r600_txbc:
670 case AMDGPUIntrinsic::r600_txf:
671 case AMDGPUIntrinsic::r600_txq:
672 case AMDGPUIntrinsic::r600_ddx:
673 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000674 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000675 unsigned TextureOp;
676 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000677 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000678 TextureOp = 0;
679 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000680 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000681 TextureOp = 1;
682 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000683 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000684 TextureOp = 2;
685 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000686 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000687 TextureOp = 3;
688 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000689 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000690 TextureOp = 4;
691 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000692 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 TextureOp = 5;
694 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000695 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 TextureOp = 6;
697 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000698 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000699 TextureOp = 7;
700 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000701 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 TextureOp = 8;
703 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000704 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000705 TextureOp = 9;
706 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000707 case AMDGPUIntrinsic::R600_ldptr:
708 TextureOp = 10;
709 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000710 default:
711 llvm_unreachable("Unknow Texture Operation");
712 }
713
714 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000715 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000717 DAG.getConstant(0, DL, MVT::i32),
718 DAG.getConstant(1, DL, MVT::i32),
719 DAG.getConstant(2, DL, MVT::i32),
720 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 Op.getOperand(2),
722 Op.getOperand(3),
723 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000724 DAG.getConstant(0, DL, MVT::i32),
725 DAG.getConstant(1, DL, MVT::i32),
726 DAG.getConstant(2, DL, MVT::i32),
727 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000728 Op.getOperand(5),
729 Op.getOperand(6),
730 Op.getOperand(7),
731 Op.getOperand(8),
732 Op.getOperand(9),
733 Op.getOperand(10)
734 };
Craig Topper48d114b2014-04-26 18:35:24 +0000735 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000736 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000737 case AMDGPUIntrinsic::AMDGPU_dp4: {
738 SDValue Args[8] = {
739 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000740 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000741 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000742 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000744 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 };
Craig Topper48d114b2014-04-26 18:35:24 +0000756 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000758
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000759 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000760 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000763 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000764 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return LowerImplicitParameter(DAG, VT, DL, 8);
777
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000778 case Intrinsic::r600_read_workdim:
779 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000780 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
781 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
782 }
Jan Veselye5121f32014-10-14 20:05:26 +0000783
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
786 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000787 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000788 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
789 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
792 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000802
803 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000804 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000805 return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
806
807 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000808 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000809 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
810 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 }
812 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
813 break;
814 }
815 } // end switch(Op.getOpcode())
816 return SDValue();
817}
818
819void R600TargetLowering::ReplaceNodeResults(SDNode *N,
820 SmallVectorImpl<SDValue> &Results,
821 SelectionDAG &DAG) const {
822 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000823 default:
824 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
825 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000826 case ISD::FP_TO_UINT:
827 if (N->getValueType(0) == MVT::i1) {
828 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
829 return;
830 }
831 // Fall-through. Since we don't care about out of bounds values
832 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
833 // considers some extra cases which are not necessary here.
834 case ISD::FP_TO_SINT: {
835 SDValue Result;
836 if (expandFP_TO_SINT(N, Result, DAG))
837 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000838 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000839 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000840 case ISD::SDIVREM: {
841 SDValue Op = SDValue(N, 1);
842 SDValue RES = LowerSDIVREM(Op, DAG);
843 Results.push_back(RES);
844 Results.push_back(RES.getValue(1));
845 break;
846 }
847 case ISD::UDIVREM: {
848 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000849 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000850 break;
851 }
852 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000853}
854
Tom Stellard880a80a2014-06-17 16:53:14 +0000855SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
856 SDValue Vector) const {
857
858 SDLoc DL(Vector);
859 EVT VecVT = Vector.getValueType();
860 EVT EltVT = VecVT.getVectorElementType();
861 SmallVector<SDValue, 8> Args;
862
863 for (unsigned i = 0, e = VecVT.getVectorNumElements();
864 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000865 Args.push_back(DAG.getNode(
866 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
867 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000868 }
869
870 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
871}
872
873SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
874 SelectionDAG &DAG) const {
875
876 SDLoc DL(Op);
877 SDValue Vector = Op.getOperand(0);
878 SDValue Index = Op.getOperand(1);
879
880 if (isa<ConstantSDNode>(Index) ||
881 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
882 return Op;
883
884 Vector = vectorToVerticalVector(DAG, Vector);
885 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
886 Vector, Index);
887}
888
889SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
890 SelectionDAG &DAG) const {
891 SDLoc DL(Op);
892 SDValue Vector = Op.getOperand(0);
893 SDValue Value = Op.getOperand(1);
894 SDValue Index = Op.getOperand(2);
895
896 if (isa<ConstantSDNode>(Index) ||
897 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
898 return Op;
899
900 Vector = vectorToVerticalVector(DAG, Vector);
901 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
902 Vector, Value, Index);
903 return vectorToVerticalVector(DAG, Insert);
904}
905
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000906SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
907 // On hw >= R700, COS/SIN input must be between -1. and 1.
908 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
909 EVT VT = Op.getValueType();
910 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000911 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000912
913 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000914 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
915 DAG.getNode(ISD::FADD, DL, VT,
916 DAG.getNode(ISD::FMUL, DL, VT, Arg,
917 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
918 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000919 unsigned TrigNode;
920 switch (Op.getOpcode()) {
921 case ISD::FCOS:
922 TrigNode = AMDGPUISD::COS_HW;
923 break;
924 case ISD::FSIN:
925 TrigNode = AMDGPUISD::SIN_HW;
926 break;
927 default:
928 llvm_unreachable("Wrong trig opcode");
929 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000930 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
931 DAG.getNode(ISD::FADD, DL, VT, FractPart,
932 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000933 if (Gen >= AMDGPUSubtarget::R700)
934 return TrigVal;
935 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000936 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
937 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000938}
939
Jan Vesely25f36272014-06-18 12:27:13 +0000940SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
941 SDLoc DL(Op);
942 EVT VT = Op.getValueType();
943
944 SDValue Lo = Op.getOperand(0);
945 SDValue Hi = Op.getOperand(1);
946 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000947 SDValue Zero = DAG.getConstant(0, DL, VT);
948 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000949
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000950 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
951 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000952 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
953 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
954
955 // The dance around Width1 is necessary for 0 special case.
956 // Without it the CompShift might be 32, producing incorrect results in
957 // Overflow. So we do the shift in two steps, the alternative is to
958 // add a conditional to filter the special case.
959
960 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
961 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
962
963 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
964 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
965 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
966
967 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
968 SDValue LoBig = Zero;
969
970 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
971 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
972
973 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
974}
975
Jan Vesely900ff2e2014-06-18 12:27:15 +0000976SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
977 SDLoc DL(Op);
978 EVT VT = Op.getValueType();
979
980 SDValue Lo = Op.getOperand(0);
981 SDValue Hi = Op.getOperand(1);
982 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000983 SDValue Zero = DAG.getConstant(0, DL, VT);
984 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000985
Jan Veselyecf51332014-06-18 12:27:17 +0000986 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
987
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000988 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
989 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000990 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
991 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
992
993 // The dance around Width1 is necessary for 0 special case.
994 // Without it the CompShift might be 32, producing incorrect results in
995 // Overflow. So we do the shift in two steps, the alternative is to
996 // add a conditional to filter the special case.
997
998 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
999 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1000
Jan Veselyecf51332014-06-18 12:27:17 +00001001 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001002 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1003 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1004
Jan Veselyecf51332014-06-18 12:27:17 +00001005 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1006 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001007
1008 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1009 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1010
1011 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1012}
1013
Jan Vesely808fff52015-04-30 17:15:56 +00001014SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1015 unsigned mainop, unsigned ovf) const {
1016 SDLoc DL(Op);
1017 EVT VT = Op.getValueType();
1018
1019 SDValue Lo = Op.getOperand(0);
1020 SDValue Hi = Op.getOperand(1);
1021
1022 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1023 // Extend sign.
1024 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1025 DAG.getValueType(MVT::i1));
1026
1027 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1028
1029 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1030}
1031
Tom Stellard75aadc22012-12-11 21:25:42 +00001032SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001033 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001034 return DAG.getNode(
1035 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001036 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001037 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001038 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001039 DAG.getCondCode(ISD::SETNE)
1040 );
1041}
1042
Tom Stellard75aadc22012-12-11 21:25:42 +00001043SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001044 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001045 unsigned DwordOffset) const {
1046 unsigned ByteOffset = DwordOffset * 4;
1047 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001048 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001049
1050 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1051 assert(isInt<16>(ByteOffset));
1052
1053 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001054 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001055 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1056 false, false, false, 0);
1057}
1058
Tom Stellard75aadc22012-12-11 21:25:42 +00001059bool R600TargetLowering::isZero(SDValue Op) const {
1060 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1061 return Cst->isNullValue();
1062 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1063 return CstFP->isZero();
1064 } else {
1065 return false;
1066 }
1067}
1068
1069SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001070 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001071 EVT VT = Op.getValueType();
1072
1073 SDValue LHS = Op.getOperand(0);
1074 SDValue RHS = Op.getOperand(1);
1075 SDValue True = Op.getOperand(2);
1076 SDValue False = Op.getOperand(3);
1077 SDValue CC = Op.getOperand(4);
1078 SDValue Temp;
1079
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001080 if (VT == MVT::f32) {
1081 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1082 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1083 if (MinMax)
1084 return MinMax;
1085 }
1086
Tom Stellard75aadc22012-12-11 21:25:42 +00001087 // LHS and RHS are guaranteed to be the same value type
1088 EVT CompareVT = LHS.getValueType();
1089
1090 // Check if we can lower this to a native operation.
1091
Tom Stellard2add82d2013-03-08 15:37:09 +00001092 // Try to lower to a SET* instruction:
1093 //
1094 // SET* can match the following patterns:
1095 //
Tom Stellardcd428182013-09-28 02:50:38 +00001096 // select_cc f32, f32, -1, 0, cc_supported
1097 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1098 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001099 //
1100
1101 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001102 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1103 ISD::CondCode InverseCC =
1104 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001105 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1106 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1107 std::swap(False, True);
1108 CC = DAG.getCondCode(InverseCC);
1109 } else {
1110 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1111 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1112 std::swap(False, True);
1113 std::swap(LHS, RHS);
1114 CC = DAG.getCondCode(SwapInvCC);
1115 }
1116 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001117 }
1118
1119 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1120 (CompareVT == VT || VT == MVT::i32)) {
1121 // This can be matched by a SET* instruction.
1122 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1123 }
1124
Tom Stellard75aadc22012-12-11 21:25:42 +00001125 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001126 //
1127 // CND* can match the following patterns:
1128 //
Tom Stellardcd428182013-09-28 02:50:38 +00001129 // select_cc f32, 0.0, f32, f32, cc_supported
1130 // select_cc f32, 0.0, i32, i32, cc_supported
1131 // select_cc i32, 0, f32, f32, cc_supported
1132 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001133 //
Tom Stellardcd428182013-09-28 02:50:38 +00001134
1135 // Try to move the zero value to the RHS
1136 if (isZero(LHS)) {
1137 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1138 // Try swapping the operands
1139 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1140 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1141 std::swap(LHS, RHS);
1142 CC = DAG.getCondCode(CCSwapped);
1143 } else {
1144 // Try inverting the conditon and then swapping the operands
1145 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1146 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1147 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1148 std::swap(True, False);
1149 std::swap(LHS, RHS);
1150 CC = DAG.getCondCode(CCSwapped);
1151 }
1152 }
1153 }
1154 if (isZero(RHS)) {
1155 SDValue Cond = LHS;
1156 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001157 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1158 if (CompareVT != VT) {
1159 // Bitcast True / False to the correct types. This will end up being
1160 // a nop, but it allows us to define only a single pattern in the
1161 // .TD files for each CND* instruction rather than having to have
1162 // one pattern for integer True/False and one for fp True/False
1163 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1164 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1165 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001166
1167 switch (CCOpcode) {
1168 case ISD::SETONE:
1169 case ISD::SETUNE:
1170 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001171 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1172 Temp = True;
1173 True = False;
1174 False = Temp;
1175 break;
1176 default:
1177 break;
1178 }
1179 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1180 Cond, Zero,
1181 True, False,
1182 DAG.getCondCode(CCOpcode));
1183 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1184 }
1185
Tom Stellard75aadc22012-12-11 21:25:42 +00001186 // If we make it this for it means we have no native instructions to handle
1187 // this SELECT_CC, so we must lower it.
1188 SDValue HWTrue, HWFalse;
1189
1190 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001191 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1192 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001193 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001194 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1195 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001196 }
1197 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001198 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001199 }
1200
1201 // Lower this unsupported SELECT_CC into a combination of two supported
1202 // SELECT_CC operations.
1203 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1204
1205 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1206 Cond, HWFalse,
1207 True, False,
1208 DAG.getCondCode(ISD::SETNE));
1209}
1210
Alp Tokercb402912014-01-24 17:20:08 +00001211/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001212/// convert these pointers to a register index. Each register holds
1213/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1214/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1215/// for indirect addressing.
1216SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1217 unsigned StackWidth,
1218 SelectionDAG &DAG) const {
1219 unsigned SRLPad;
1220 switch(StackWidth) {
1221 case 1:
1222 SRLPad = 2;
1223 break;
1224 case 2:
1225 SRLPad = 3;
1226 break;
1227 case 4:
1228 SRLPad = 4;
1229 break;
1230 default: llvm_unreachable("Invalid stack width");
1231 }
1232
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001233 SDLoc DL(Ptr);
1234 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1235 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001236}
1237
1238void R600TargetLowering::getStackAddress(unsigned StackWidth,
1239 unsigned ElemIdx,
1240 unsigned &Channel,
1241 unsigned &PtrIncr) const {
1242 switch (StackWidth) {
1243 default:
1244 case 1:
1245 Channel = 0;
1246 if (ElemIdx > 0) {
1247 PtrIncr = 1;
1248 } else {
1249 PtrIncr = 0;
1250 }
1251 break;
1252 case 2:
1253 Channel = ElemIdx % 2;
1254 if (ElemIdx == 2) {
1255 PtrIncr = 1;
1256 } else {
1257 PtrIncr = 0;
1258 }
1259 break;
1260 case 4:
1261 Channel = ElemIdx;
1262 PtrIncr = 0;
1263 break;
1264 }
1265}
1266
Tom Stellard75aadc22012-12-11 21:25:42 +00001267SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001268 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001269 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1270 SDValue Chain = Op.getOperand(0);
1271 SDValue Value = Op.getOperand(1);
1272 SDValue Ptr = Op.getOperand(2);
1273
Tom Stellard2ffc3302013-08-26 15:05:44 +00001274 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001275 if (Result.getNode()) {
1276 return Result;
1277 }
1278
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001279 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1280 if (StoreNode->isTruncatingStore()) {
1281 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001282 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001283 EVT MemVT = StoreNode->getMemoryVT();
1284 SDValue MaskConstant;
1285 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001286 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001287 } else {
1288 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001289 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001290 }
1291 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001292 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001293 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001294 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001295 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1296 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001297 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001298 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1299 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1300 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1301 // vector instead.
1302 SDValue Src[4] = {
1303 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001304 DAG.getConstant(0, DL, MVT::i32),
1305 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001306 Mask
1307 };
Craig Topper48d114b2014-04-26 18:35:24 +00001308 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001309 SDValue Args[3] = { Chain, Input, DWordAddr };
1310 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001311 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001312 StoreNode->getMemOperand());
1313 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1314 Value.getValueType().bitsGE(MVT::i32)) {
1315 // Convert pointer from byte address to dword address.
1316 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1317 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001318 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001319
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001320 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001321 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001322 } else {
1323 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1324 }
1325 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001326 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001327 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001328
1329 EVT ValueVT = Value.getValueType();
1330
1331 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1332 return SDValue();
1333 }
1334
Tom Stellarde9373602014-01-22 19:24:14 +00001335 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1336 if (Ret.getNode()) {
1337 return Ret;
1338 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001339 // Lowering for indirect addressing
1340
1341 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001342 const AMDGPUFrameLowering *TFL =
1343 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001344 unsigned StackWidth = TFL->getStackWidth(MF);
1345
1346 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1347
1348 if (ValueVT.isVector()) {
1349 unsigned NumElemVT = ValueVT.getVectorNumElements();
1350 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001351 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001352
1353 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1354 "vector width in load");
1355
1356 for (unsigned i = 0; i < NumElemVT; ++i) {
1357 unsigned Channel, PtrIncr;
1358 getStackAddress(StackWidth, i, Channel, PtrIncr);
1359 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001360 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001361 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001362 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001363
1364 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1365 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001366 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001367 }
Craig Topper48d114b2014-04-26 18:35:24 +00001368 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001369 } else {
1370 if (ValueVT == MVT::i8) {
1371 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1372 }
1373 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001374 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001375 }
1376
1377 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001378}
1379
Tom Stellard365366f2013-01-23 02:09:06 +00001380// return (512 + (kc_bank << 12)
1381static int
1382ConstantAddressBlock(unsigned AddressSpace) {
1383 switch (AddressSpace) {
1384 case AMDGPUAS::CONSTANT_BUFFER_0:
1385 return 512;
1386 case AMDGPUAS::CONSTANT_BUFFER_1:
1387 return 512 + 4096;
1388 case AMDGPUAS::CONSTANT_BUFFER_2:
1389 return 512 + 4096 * 2;
1390 case AMDGPUAS::CONSTANT_BUFFER_3:
1391 return 512 + 4096 * 3;
1392 case AMDGPUAS::CONSTANT_BUFFER_4:
1393 return 512 + 4096 * 4;
1394 case AMDGPUAS::CONSTANT_BUFFER_5:
1395 return 512 + 4096 * 5;
1396 case AMDGPUAS::CONSTANT_BUFFER_6:
1397 return 512 + 4096 * 6;
1398 case AMDGPUAS::CONSTANT_BUFFER_7:
1399 return 512 + 4096 * 7;
1400 case AMDGPUAS::CONSTANT_BUFFER_8:
1401 return 512 + 4096 * 8;
1402 case AMDGPUAS::CONSTANT_BUFFER_9:
1403 return 512 + 4096 * 9;
1404 case AMDGPUAS::CONSTANT_BUFFER_10:
1405 return 512 + 4096 * 10;
1406 case AMDGPUAS::CONSTANT_BUFFER_11:
1407 return 512 + 4096 * 11;
1408 case AMDGPUAS::CONSTANT_BUFFER_12:
1409 return 512 + 4096 * 12;
1410 case AMDGPUAS::CONSTANT_BUFFER_13:
1411 return 512 + 4096 * 13;
1412 case AMDGPUAS::CONSTANT_BUFFER_14:
1413 return 512 + 4096 * 14;
1414 case AMDGPUAS::CONSTANT_BUFFER_15:
1415 return 512 + 4096 * 15;
1416 default:
1417 return -1;
1418 }
1419}
1420
1421SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1422{
1423 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001424 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001425 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1426 SDValue Chain = Op.getOperand(0);
1427 SDValue Ptr = Op.getOperand(1);
1428 SDValue LoweredLoad;
1429
Matt Arsenault8b03e6c2015-07-09 18:47:03 +00001430 if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1431 return Ret;
Tom Stellarde9373602014-01-22 19:24:14 +00001432
Tom Stellard067c8152014-07-21 14:01:14 +00001433 // Lower loads constant address space global variable loads
1434 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001435 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001436 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001437
Mehdi Amini44ede332015-07-09 02:09:04 +00001438 SDValue Ptr = DAG.getZExtOrTrunc(
1439 LoadNode->getBasePtr(), DL,
1440 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001441 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001442 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001443 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1444 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001445 DAG.getTargetConstant(0, DL, MVT::i32),
1446 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001447 }
Tom Stellarde9373602014-01-22 19:24:14 +00001448
Tom Stellard35bb18c2013-08-26 15:06:04 +00001449 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1450 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001451 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001452 Chain
1453 };
Craig Topper64941d92014-04-27 19:20:57 +00001454 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001455 }
1456
Tom Stellard365366f2013-01-23 02:09:06 +00001457 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001458 if (ConstantBlock > -1 &&
1459 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1460 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001461 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001462 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1463 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001464 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001465 SDValue Slots[4];
1466 for (unsigned i = 0; i < 4; i++) {
1467 // We want Const position encoded with the following formula :
1468 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1469 // const_index is Ptr computed by llvm using an alignment of 16.
1470 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1471 // then div by 4 at the ISel step
1472 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001473 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001474 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1475 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001476 EVT NewVT = MVT::v4i32;
1477 unsigned NumElements = 4;
1478 if (VT.isVector()) {
1479 NewVT = VT;
1480 NumElements = VT.getVectorNumElements();
1481 }
Craig Topper48d114b2014-04-26 18:35:24 +00001482 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001483 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001484 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001485 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001486 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001487 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1488 DAG.getConstant(4, DL, MVT::i32)),
1489 DAG.getConstant(LoadNode->getAddressSpace() -
1490 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001491 );
1492 }
1493
1494 if (!VT.isVector()) {
1495 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001496 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001497 }
1498
1499 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001500 Result,
1501 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001502 };
Craig Topper64941d92014-04-27 19:20:57 +00001503 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001504 }
1505
Matt Arsenault909d0c02013-10-30 23:43:29 +00001506 // For most operations returning SDValue() will result in the node being
1507 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1508 // need to manually expand loads that may be legal in some address spaces and
1509 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1510 // compute shaders, since the data is sign extended when it is uploaded to the
1511 // buffer. However SEXT loads from other address spaces are not supported, so
1512 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001513 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1514 EVT MemVT = LoadNode->getMemoryVT();
1515 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001516 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1517 LoadNode->getPointerInfo(), MemVT,
1518 LoadNode->isVolatile(),
1519 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001520 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001521 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001522 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1523 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001524
Jan Veselyb670d372015-05-26 18:07:22 +00001525 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001526 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001527 }
1528
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001529 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1530 return SDValue();
1531 }
1532
1533 // Lowering for indirect addressing
1534 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001535 const AMDGPUFrameLowering *TFL =
1536 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001537 unsigned StackWidth = TFL->getStackWidth(MF);
1538
1539 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1540
1541 if (VT.isVector()) {
1542 unsigned NumElemVT = VT.getVectorNumElements();
1543 EVT ElemVT = VT.getVectorElementType();
1544 SDValue Loads[4];
1545
1546 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1547 "vector width in load");
1548
1549 for (unsigned i = 0; i < NumElemVT; ++i) {
1550 unsigned Channel, PtrIncr;
1551 getStackAddress(StackWidth, i, Channel, PtrIncr);
1552 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001553 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001554 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1555 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001556 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001557 Op.getOperand(2));
1558 }
1559 for (unsigned i = NumElemVT; i < 4; ++i) {
1560 Loads[i] = DAG.getUNDEF(ElemVT);
1561 }
1562 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001563 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001564 } else {
1565 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1566 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001567 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001568 Op.getOperand(2));
1569 }
1570
Matt Arsenault7939acd2014-04-07 16:44:24 +00001571 SDValue Ops[2] = {
1572 LoweredLoad,
1573 Chain
1574 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001575
Craig Topper64941d92014-04-27 19:20:57 +00001576 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001577}
Tom Stellard75aadc22012-12-11 21:25:42 +00001578
Matt Arsenault1d555c42014-06-23 18:00:55 +00001579SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1580 SDValue Chain = Op.getOperand(0);
1581 SDValue Cond = Op.getOperand(1);
1582 SDValue Jump = Op.getOperand(2);
1583
1584 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1585 Chain, Jump, Cond);
1586}
1587
Tom Stellard75aadc22012-12-11 21:25:42 +00001588/// XXX Only kernel functions are supported, so we can assume for now that
1589/// every function is a kernel function, but in the future we should use
1590/// separate calling conventions for kernel and non-kernel functions.
1591SDValue R600TargetLowering::LowerFormalArguments(
1592 SDValue Chain,
1593 CallingConv::ID CallConv,
1594 bool isVarArg,
1595 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001596 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001597 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001598 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001599 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1600 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001601 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001602 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001603
Tom Stellardaf775432013-10-23 00:44:32 +00001604 SmallVector<ISD::InputArg, 8> LocalIns;
1605
Matt Arsenault209a7b92014-04-18 07:40:20 +00001606 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001607
1608 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001609
Tom Stellard1e803092013-07-23 01:48:18 +00001610 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001611 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001612 const ISD::InputArg &In = Ins[i];
1613 EVT VT = In.VT;
1614 EVT MemVT = VA.getLocVT();
1615 if (!VT.isVector() && MemVT.isVector()) {
1616 // Get load source type if scalarized.
1617 MemVT = MemVT.getVectorElementType();
1618 }
Tom Stellard78e01292013-07-23 01:47:58 +00001619
Jan Veselye5121f32014-10-14 20:05:26 +00001620 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001621 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1622 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1623 InVals.push_back(Register);
1624 continue;
1625 }
1626
Tom Stellard75aadc22012-12-11 21:25:42 +00001627 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001628 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001629
Matt Arsenaultfae02982014-03-17 18:58:11 +00001630 // i64 isn't a legal type, so the register type used ends up as i32, which
1631 // isn't expected here. It attempts to create this sextload, but it ends up
1632 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1633 // for <1 x i64>.
1634
Tom Stellardacfeebf2013-07-23 01:48:05 +00001635 // The first 36 bytes of the input buffer contains information about
1636 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001637 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1638 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1639 // FIXME: This should really check the extload type, but the handling of
1640 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001641
Matt Arsenault74ef2772014-08-13 18:14:11 +00001642 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1643 Ext = ISD::SEXTLOAD;
1644 }
1645
1646 // Compute the offset from the value.
1647 // XXX - I think PartOffset should give you this, but it seems to give the
1648 // size of the register which isn't useful.
1649
Andrew Trick05938a52015-02-16 18:10:47 +00001650 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001651 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001652 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001653
1654 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1655 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001656 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001657 DAG.getUNDEF(MVT::i32),
1658 PtrInfo,
1659 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001660
1661 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001662 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001663 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001664 }
1665 return Chain;
1666}
1667
Mehdi Amini44ede332015-07-09 02:09:04 +00001668EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1669 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001670 if (!VT.isVector())
1671 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001672 return VT.changeVectorElementTypeToInteger();
1673}
1674
Matt Arsenault209a7b92014-04-18 07:40:20 +00001675static SDValue CompactSwizzlableVector(
1676 SelectionDAG &DAG, SDValue VectorEntry,
1677 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001678 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1679 assert(RemapSwizzle.empty());
1680 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001681 VectorEntry.getOperand(0),
1682 VectorEntry.getOperand(1),
1683 VectorEntry.getOperand(2),
1684 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001685 };
1686
1687 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001688 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1689 // We mask write here to teach later passes that the ith element of this
1690 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1691 // break false dependencies and additionnaly make assembly easier to read.
1692 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001693 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1694 if (C->isZero()) {
1695 RemapSwizzle[i] = 4; // SEL_0
1696 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1697 } else if (C->isExactlyValue(1.0)) {
1698 RemapSwizzle[i] = 5; // SEL_1
1699 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1700 }
1701 }
1702
1703 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1704 continue;
1705 for (unsigned j = 0; j < i; j++) {
1706 if (NewBldVec[i] == NewBldVec[j]) {
1707 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1708 RemapSwizzle[i] = j;
1709 break;
1710 }
1711 }
1712 }
1713
1714 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001715 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001716}
1717
Benjamin Kramer193960c2013-06-11 13:32:25 +00001718static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1719 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001720 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1721 assert(RemapSwizzle.empty());
1722 SDValue NewBldVec[4] = {
1723 VectorEntry.getOperand(0),
1724 VectorEntry.getOperand(1),
1725 VectorEntry.getOperand(2),
1726 VectorEntry.getOperand(3)
1727 };
1728 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001729 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001730 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001731 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1732 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1733 ->getZExtValue();
1734 if (i == Idx)
1735 isUnmovable[Idx] = true;
1736 }
1737 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001738
1739 for (unsigned i = 0; i < 4; i++) {
1740 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1741 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1742 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001743 if (isUnmovable[Idx])
1744 continue;
1745 // Swap i and Idx
1746 std::swap(NewBldVec[Idx], NewBldVec[i]);
1747 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1748 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001749 }
1750 }
1751
1752 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001753 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001754}
1755
1756
1757SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001758 SDValue Swz[4], SelectionDAG &DAG,
1759 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001760 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1761 // Old -> New swizzle values
1762 DenseMap<unsigned, unsigned> SwizzleRemap;
1763
1764 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1765 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001766 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001767 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001768 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001769 }
1770
1771 SwizzleRemap.clear();
1772 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1773 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001774 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001775 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001776 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001777 }
1778
1779 return BuildVector;
1780}
1781
1782
Tom Stellard75aadc22012-12-11 21:25:42 +00001783//===----------------------------------------------------------------------===//
1784// Custom DAG Optimizations
1785//===----------------------------------------------------------------------===//
1786
1787SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1788 DAGCombinerInfo &DCI) const {
1789 SelectionDAG &DAG = DCI.DAG;
1790
1791 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001792 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001793 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1794 case ISD::FP_ROUND: {
1795 SDValue Arg = N->getOperand(0);
1796 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001797 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001798 Arg.getOperand(0));
1799 }
1800 break;
1801 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001802
1803 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1804 // (i32 select_cc f32, f32, -1, 0 cc)
1805 //
1806 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1807 // this to one of the SET*_DX10 instructions.
1808 case ISD::FP_TO_SINT: {
1809 SDValue FNeg = N->getOperand(0);
1810 if (FNeg.getOpcode() != ISD::FNEG) {
1811 return SDValue();
1812 }
1813 SDValue SelectCC = FNeg.getOperand(0);
1814 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1815 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1816 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1817 !isHWTrueValue(SelectCC.getOperand(2)) ||
1818 !isHWFalseValue(SelectCC.getOperand(3))) {
1819 return SDValue();
1820 }
1821
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001822 SDLoc dl(N);
1823 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001824 SelectCC.getOperand(0), // LHS
1825 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001826 DAG.getConstant(-1, dl, MVT::i32), // True
1827 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001828 SelectCC.getOperand(4)); // CC
1829
1830 break;
1831 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001832
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001833 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1834 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001835 case ISD::INSERT_VECTOR_ELT: {
1836 SDValue InVec = N->getOperand(0);
1837 SDValue InVal = N->getOperand(1);
1838 SDValue EltNo = N->getOperand(2);
1839 SDLoc dl(N);
1840
1841 // If the inserted element is an UNDEF, just use the input vector.
1842 if (InVal.getOpcode() == ISD::UNDEF)
1843 return InVec;
1844
1845 EVT VT = InVec.getValueType();
1846
1847 // If we can't generate a legal BUILD_VECTOR, exit
1848 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1849 return SDValue();
1850
1851 // Check that we know which element is being inserted
1852 if (!isa<ConstantSDNode>(EltNo))
1853 return SDValue();
1854 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1855
1856 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1857 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1858 // vector elements.
1859 SmallVector<SDValue, 8> Ops;
1860 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1861 Ops.append(InVec.getNode()->op_begin(),
1862 InVec.getNode()->op_end());
1863 } else if (InVec.getOpcode() == ISD::UNDEF) {
1864 unsigned NElts = VT.getVectorNumElements();
1865 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1866 } else {
1867 return SDValue();
1868 }
1869
1870 // Insert the element
1871 if (Elt < Ops.size()) {
1872 // All the operands of BUILD_VECTOR must have the same type;
1873 // we enforce that here.
1874 EVT OpVT = Ops[0].getValueType();
1875 if (InVal.getValueType() != OpVT)
1876 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1877 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1878 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1879 Ops[Elt] = InVal;
1880 }
1881
1882 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001883 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001884 }
1885
Tom Stellard365366f2013-01-23 02:09:06 +00001886 // Extract_vec (Build_vector) generated by custom lowering
1887 // also needs to be customly combined
1888 case ISD::EXTRACT_VECTOR_ELT: {
1889 SDValue Arg = N->getOperand(0);
1890 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1891 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1892 unsigned Element = Const->getZExtValue();
1893 return Arg->getOperand(Element);
1894 }
1895 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001896 if (Arg.getOpcode() == ISD::BITCAST &&
1897 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1898 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1899 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001900 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001901 Arg->getOperand(0).getOperand(Element));
1902 }
1903 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00001904 break;
Tom Stellard365366f2013-01-23 02:09:06 +00001905 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001906
1907 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001908 // Try common optimizations
1909 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1910 if (Ret.getNode())
1911 return Ret;
1912
Tom Stellarde06163a2013-02-07 14:02:35 +00001913 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1914 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001915 //
1916 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1917 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001918 SDValue LHS = N->getOperand(0);
1919 if (LHS.getOpcode() != ISD::SELECT_CC) {
1920 return SDValue();
1921 }
1922
1923 SDValue RHS = N->getOperand(1);
1924 SDValue True = N->getOperand(2);
1925 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001926 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001927
1928 if (LHS.getOperand(2).getNode() != True.getNode() ||
1929 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001930 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001931 return SDValue();
1932 }
1933
Tom Stellard5e524892013-03-08 15:37:11 +00001934 switch (NCC) {
1935 default: return SDValue();
1936 case ISD::SETNE: return LHS;
1937 case ISD::SETEQ: {
1938 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1939 LHSCC = ISD::getSetCCInverse(LHSCC,
1940 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001941 if (DCI.isBeforeLegalizeOps() ||
1942 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1943 return DAG.getSelectCC(SDLoc(N),
1944 LHS.getOperand(0),
1945 LHS.getOperand(1),
1946 LHS.getOperand(2),
1947 LHS.getOperand(3),
1948 LHSCC);
1949 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001950 }
Tom Stellard5e524892013-03-08 15:37:11 +00001951 }
Tom Stellardcd428182013-09-28 02:50:38 +00001952 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001953 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001954
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001955 case AMDGPUISD::EXPORT: {
1956 SDValue Arg = N->getOperand(1);
1957 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1958 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001959
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001960 SDValue NewArgs[8] = {
1961 N->getOperand(0), // Chain
1962 SDValue(),
1963 N->getOperand(2), // ArrayBase
1964 N->getOperand(3), // Type
1965 N->getOperand(4), // SWZ_X
1966 N->getOperand(5), // SWZ_Y
1967 N->getOperand(6), // SWZ_Z
1968 N->getOperand(7) // SWZ_W
1969 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001970 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001971 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00001972 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001973 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001974 case AMDGPUISD::TEXTURE_FETCH: {
1975 SDValue Arg = N->getOperand(1);
1976 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1977 break;
1978
1979 SDValue NewArgs[19] = {
1980 N->getOperand(0),
1981 N->getOperand(1),
1982 N->getOperand(2),
1983 N->getOperand(3),
1984 N->getOperand(4),
1985 N->getOperand(5),
1986 N->getOperand(6),
1987 N->getOperand(7),
1988 N->getOperand(8),
1989 N->getOperand(9),
1990 N->getOperand(10),
1991 N->getOperand(11),
1992 N->getOperand(12),
1993 N->getOperand(13),
1994 N->getOperand(14),
1995 N->getOperand(15),
1996 N->getOperand(16),
1997 N->getOperand(17),
1998 N->getOperand(18),
1999 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002000 SDLoc DL(N);
2001 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2002 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002003 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002004 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002005
2006 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002007}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002008
2009static bool
2010FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002011 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002012 const R600InstrInfo *TII =
2013 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002014 if (!Src.isMachineOpcode())
2015 return false;
2016 switch (Src.getMachineOpcode()) {
2017 case AMDGPU::FNEG_R600:
2018 if (!Neg.getNode())
2019 return false;
2020 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002021 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002022 return true;
2023 case AMDGPU::FABS_R600:
2024 if (!Abs.getNode())
2025 return false;
2026 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002027 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002028 return true;
2029 case AMDGPU::CONST_COPY: {
2030 unsigned Opcode = ParentNode->getMachineOpcode();
2031 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2032
2033 if (!Sel.getNode())
2034 return false;
2035
2036 SDValue CstOffset = Src.getOperand(0);
2037 if (ParentNode->getValueType(0).isVector())
2038 return false;
2039
2040 // Gather constants values
2041 int SrcIndices[] = {
2042 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2043 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2044 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2045 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2046 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2047 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2048 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2049 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2050 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2051 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2052 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2053 };
2054 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002055 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002056 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2057 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2058 continue;
2059 if (HasDst) {
2060 OtherSrcIdx--;
2061 OtherSelIdx--;
2062 }
2063 if (RegisterSDNode *Reg =
2064 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2065 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002066 ConstantSDNode *Cst
2067 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002068 Consts.push_back(Cst->getZExtValue());
2069 }
2070 }
2071 }
2072
Matt Arsenault37c12d72014-05-12 20:42:57 +00002073 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002074 Consts.push_back(Cst->getZExtValue());
2075 if (!TII->fitsConstReadLimitations(Consts)) {
2076 return false;
2077 }
2078
2079 Sel = CstOffset;
2080 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2081 return true;
2082 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002083 case AMDGPU::MOV_IMM_I32:
2084 case AMDGPU::MOV_IMM_F32: {
2085 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2086 uint64_t ImmValue = 0;
2087
2088
2089 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2090 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2091 float FloatValue = FPC->getValueAPF().convertToFloat();
2092 if (FloatValue == 0.0) {
2093 ImmReg = AMDGPU::ZERO;
2094 } else if (FloatValue == 0.5) {
2095 ImmReg = AMDGPU::HALF;
2096 } else if (FloatValue == 1.0) {
2097 ImmReg = AMDGPU::ONE;
2098 } else {
2099 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2100 }
2101 } else {
2102 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2103 uint64_t Value = C->getZExtValue();
2104 if (Value == 0) {
2105 ImmReg = AMDGPU::ZERO;
2106 } else if (Value == 1) {
2107 ImmReg = AMDGPU::ONE_INT;
2108 } else {
2109 ImmValue = Value;
2110 }
2111 }
2112
2113 // Check that we aren't already using an immediate.
2114 // XXX: It's possible for an instruction to have more than one
2115 // immediate operand, but this is not supported yet.
2116 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2117 if (!Imm.getNode())
2118 return false;
2119 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2120 assert(C);
2121 if (C->getZExtValue())
2122 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002123 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002124 }
2125 Src = DAG.getRegister(ImmReg, MVT::i32);
2126 return true;
2127 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002128 default:
2129 return false;
2130 }
2131}
2132
2133
2134/// \brief Fold the instructions after selecting them
2135SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2136 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002137 const R600InstrInfo *TII =
2138 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002139 if (!Node->isMachineOpcode())
2140 return Node;
2141 unsigned Opcode = Node->getMachineOpcode();
2142 SDValue FakeOp;
2143
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002144 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002145
2146 if (Opcode == AMDGPU::DOT_4) {
2147 int OperandIdx[] = {
2148 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2149 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2150 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2151 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2152 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2153 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2154 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2155 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002156 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002157 int NegIdx[] = {
2158 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2159 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2160 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2161 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2162 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2163 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2164 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2165 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2166 };
2167 int AbsIdx[] = {
2168 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2169 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2170 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2171 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2172 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2173 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2174 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2175 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2176 };
2177 for (unsigned i = 0; i < 8; i++) {
2178 if (OperandIdx[i] < 0)
2179 return Node;
2180 SDValue &Src = Ops[OperandIdx[i] - 1];
2181 SDValue &Neg = Ops[NegIdx[i] - 1];
2182 SDValue &Abs = Ops[AbsIdx[i] - 1];
2183 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2184 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2185 if (HasDst)
2186 SelIdx--;
2187 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002188 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2189 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2190 }
2191 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2192 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2193 SDValue &Src = Ops[i];
2194 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002195 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2196 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002197 } else if (Opcode == AMDGPU::CLAMP_R600) {
2198 SDValue Src = Node->getOperand(0);
2199 if (!Src.isMachineOpcode() ||
2200 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2201 return Node;
2202 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2203 AMDGPU::OpName::clamp);
2204 if (ClampIdx < 0)
2205 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002206 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002207 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002208 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2209 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2210 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002211 } else {
2212 if (!TII->hasInstrModifiers(Opcode))
2213 return Node;
2214 int OperandIdx[] = {
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2218 };
2219 int NegIdx[] = {
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2223 };
2224 int AbsIdx[] = {
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2227 -1
2228 };
2229 for (unsigned i = 0; i < 3; i++) {
2230 if (OperandIdx[i] < 0)
2231 return Node;
2232 SDValue &Src = Ops[OperandIdx[i] - 1];
2233 SDValue &Neg = Ops[NegIdx[i] - 1];
2234 SDValue FakeAbs;
2235 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2236 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2237 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002238 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2239 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002240 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002241 ImmIdx--;
2242 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002243 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002244 SDValue &Imm = Ops[ImmIdx];
2245 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002246 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2247 }
2248 }
2249
2250 return Node;
2251}