blob: 6c052b810b307f2e6adb2a7c166a3402423c9535 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000289 case AMDGPU::RAT_STORE_TYPED_eg: {
290 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
291 .addOperand(MI->getOperand(0))
292 .addOperand(MI->getOperand(1))
293 .addOperand(MI->getOperand(2))
294 .addImm(isEOP(I)); // Set End of program bit
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Tom Stellard75aadc22012-12-11 21:25:42 +0000298 case AMDGPU::TXD: {
299 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
300 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000301 MachineOperand &RID = MI->getOperand(4);
302 MachineOperand &SID = MI->getOperand(5);
303 unsigned TextureId = MI->getOperand(6).getImm();
304 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
305 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000306
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 switch (TextureId) {
308 case 5: // Rect
309 CTX = CTY = 0;
310 break;
311 case 6: // Shadow1D
312 SrcW = SrcZ;
313 break;
314 case 7: // Shadow2D
315 SrcW = SrcZ;
316 break;
317 case 8: // ShadowRect
318 CTX = CTY = 0;
319 SrcW = SrcZ;
320 break;
321 case 9: // 1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 10: // 2DArray
326 CTZ = 0;
327 break;
328 case 11: // Shadow1DArray
329 SrcZ = SrcY;
330 CTZ = 0;
331 break;
332 case 12: // Shadow2DArray
333 CTZ = 0;
334 break;
335 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
337 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
356 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
375 .addOperand(MI->getOperand(0))
376 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000377 .addImm(SrcX)
378 .addImm(SrcY)
379 .addImm(SrcZ)
380 .addImm(SrcW)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(1)
386 .addImm(2)
387 .addImm(3)
388 .addOperand(RID)
389 .addOperand(SID)
390 .addImm(CTX)
391 .addImm(CTY)
392 .addImm(CTZ)
393 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 .addReg(T0, RegState::Implicit)
395 .addReg(T1, RegState::Implicit);
396 break;
397 }
398
399 case AMDGPU::TXD_SHADOW: {
400 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
401 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000402 MachineOperand &RID = MI->getOperand(4);
403 MachineOperand &SID = MI->getOperand(5);
404 unsigned TextureId = MI->getOperand(6).getImm();
405 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
406 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
407
408 switch (TextureId) {
409 case 5: // Rect
410 CTX = CTY = 0;
411 break;
412 case 6: // Shadow1D
413 SrcW = SrcZ;
414 break;
415 case 7: // Shadow2D
416 SrcW = SrcZ;
417 break;
418 case 8: // ShadowRect
419 CTX = CTY = 0;
420 SrcW = SrcZ;
421 break;
422 case 9: // 1DArray
423 SrcZ = SrcY;
424 CTZ = 0;
425 break;
426 case 10: // 2DArray
427 CTZ = 0;
428 break;
429 case 11: // Shadow1DArray
430 SrcZ = SrcY;
431 CTZ = 0;
432 break;
433 case 12: // Shadow2DArray
434 CTZ = 0;
435 break;
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437
438 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
439 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
458 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
477 .addOperand(MI->getOperand(0))
478 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000479 .addImm(SrcX)
480 .addImm(SrcY)
481 .addImm(SrcZ)
482 .addImm(SrcW)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(0)
487 .addImm(1)
488 .addImm(2)
489 .addImm(3)
490 .addOperand(RID)
491 .addOperand(SID)
492 .addImm(CTX)
493 .addImm(CTY)
494 .addImm(CTZ)
495 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addReg(T0, RegState::Implicit)
497 .addReg(T1, RegState::Implicit);
498 break;
499 }
500
501 case AMDGPU::BRANCH:
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 break;
505
506 case AMDGPU::BRANCH_COND_f32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
520 case AMDGPU::BRANCH_COND_i32: {
521 MachineInstr *NewMI =
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
523 AMDGPU::PREDICATE_BIT)
524 .addOperand(MI->getOperand(1))
525 .addImm(OPCODE_IS_NOT_ZERO_INT)
526 .addImm(0); // Flags
527 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 .addOperand(MI->getOperand(0))
530 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
531 break;
532 }
533
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 case AMDGPU::EG_ExportSwz:
535 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 // Instruction is left unmodified if its not the last one of its type
537 bool isLastInstructionOfItsType = true;
538 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000539 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000540 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
543 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
544 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
545 .getImm();
546 if (CurrentInstExportType == InstExportType) {
547 isLastInstructionOfItsType = false;
548 break;
549 }
550 }
551 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000552 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000553 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 return BB;
555 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
556 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
557 .addOperand(MI->getOperand(0))
558 .addOperand(MI->getOperand(1))
559 .addOperand(MI->getOperand(2))
560 .addOperand(MI->getOperand(3))
561 .addOperand(MI->getOperand(4))
562 .addOperand(MI->getOperand(5))
563 .addOperand(MI->getOperand(6))
564 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 break;
567 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000568 case AMDGPU::RETURN: {
569 // RETURN instructions must have the live-out registers as implicit uses,
570 // otherwise they appear dead.
571 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
572 MachineInstrBuilder MIB(*MF, MI);
573 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
574 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
575 return BB;
576 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000577 }
578
579 MI->eraseFromParent();
580 return BB;
581}
582
583//===----------------------------------------------------------------------===//
584// Custom DAG Lowering Operations
585//===----------------------------------------------------------------------===//
586
Tom Stellard75aadc22012-12-11 21:25:42 +0000587SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000588 MachineFunction &MF = DAG.getMachineFunction();
589 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 switch (Op.getOpcode()) {
591 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000592 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000594 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000595 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000596 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000597 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
598 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000599 case ISD::FCOS:
600 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000603 case ISD::LOAD: {
604 SDValue Result = LowerLOAD(Op, DAG);
605 assert((!Result.getNode() ||
606 Result.getNode()->getNumValues() == 2) &&
607 "Load should return a value and a chain");
608 return Result;
609 }
610
Matt Arsenault1d555c42014-06-23 18:00:55 +0000611 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000612 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 case ISD::INTRINSIC_VOID: {
614 SDValue Chain = Op.getOperand(0);
615 unsigned IntrinsicID =
616 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
617 switch (IntrinsicID) {
618 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000619 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
620 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000621 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000622 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000623 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000624 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000625 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000626 const SDValue Args[8] = {
627 Chain,
628 Op.getOperand(2), // Export Value
629 Op.getOperand(3), // ArrayBase
630 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000631 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
632 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
633 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
634 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000635 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000636 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000637 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000638
Tom Stellard75aadc22012-12-11 21:25:42 +0000639 // default for switch(IntrinsicID)
640 default: break;
641 }
642 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
643 break;
644 }
645 case ISD::INTRINSIC_WO_CHAIN: {
646 unsigned IntrinsicID =
647 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
648 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000649 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000650 switch(IntrinsicID) {
651 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000652 case AMDGPUIntrinsic::R600_interp_xy:
653 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000654 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000655 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000656 SDValue RegisterINode = Op.getOperand(2);
657 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000658
Vincent Lejeunef143af32013-11-11 22:10:24 +0000659 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000660 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000661 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000662 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000663 else
664 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000665 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000666 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000667 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
668 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000670 case AMDGPUIntrinsic::r600_tex:
671 case AMDGPUIntrinsic::r600_texc:
672 case AMDGPUIntrinsic::r600_txl:
673 case AMDGPUIntrinsic::r600_txlc:
674 case AMDGPUIntrinsic::r600_txb:
675 case AMDGPUIntrinsic::r600_txbc:
676 case AMDGPUIntrinsic::r600_txf:
677 case AMDGPUIntrinsic::r600_txq:
678 case AMDGPUIntrinsic::r600_ddx:
679 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000680 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000681 unsigned TextureOp;
682 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000683 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000684 TextureOp = 0;
685 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000686 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000687 TextureOp = 1;
688 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000689 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000690 TextureOp = 2;
691 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000692 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 TextureOp = 3;
694 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000695 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 TextureOp = 4;
697 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000698 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000699 TextureOp = 5;
700 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000701 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 TextureOp = 6;
703 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000704 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000705 TextureOp = 7;
706 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000707 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000708 TextureOp = 8;
709 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000710 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000711 TextureOp = 9;
712 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000713 case AMDGPUIntrinsic::R600_ldptr:
714 TextureOp = 10;
715 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 default:
717 llvm_unreachable("Unknow Texture Operation");
718 }
719
720 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000721 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000722 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000723 DAG.getConstant(0, DL, MVT::i32),
724 DAG.getConstant(1, DL, MVT::i32),
725 DAG.getConstant(2, DL, MVT::i32),
726 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000727 Op.getOperand(2),
728 Op.getOperand(3),
729 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000730 DAG.getConstant(0, DL, MVT::i32),
731 DAG.getConstant(1, DL, MVT::i32),
732 DAG.getConstant(2, DL, MVT::i32),
733 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000734 Op.getOperand(5),
735 Op.getOperand(6),
736 Op.getOperand(7),
737 Op.getOperand(8),
738 Op.getOperand(9),
739 Op.getOperand(10)
740 };
Craig Topper48d114b2014-04-26 18:35:24 +0000741 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000742 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 case AMDGPUIntrinsic::AMDGPU_dp4: {
744 SDValue Args[8] = {
745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000756 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000758 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000759 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000760 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000761 };
Craig Topper48d114b2014-04-26 18:35:24 +0000762 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000763 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000764
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000777 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000778 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000779 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000781 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000782 return LowerImplicitParameter(DAG, VT, DL, 8);
783
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000784 case Intrinsic::r600_read_workdim:
785 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000786 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
787 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
788 }
Jan Veselye5121f32014-10-14 20:05:26 +0000789
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
792 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
804 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
807 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000808
809 // FIXME: Should be renamed to r600 prefix
810 case Intrinsic::AMDGPU_rsq_clamped:
811 return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
812
813 case Intrinsic::r600_rsq:
Matt Arsenault257d48d2014-06-24 22:13:39 +0000814 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
815 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 }
817 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
818 break;
819 }
820 } // end switch(Op.getOpcode())
821 return SDValue();
822}
823
824void R600TargetLowering::ReplaceNodeResults(SDNode *N,
825 SmallVectorImpl<SDValue> &Results,
826 SelectionDAG &DAG) const {
827 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000828 default:
829 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
830 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000831 case ISD::FP_TO_UINT:
832 if (N->getValueType(0) == MVT::i1) {
833 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
834 return;
835 }
836 // Fall-through. Since we don't care about out of bounds values
837 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
838 // considers some extra cases which are not necessary here.
839 case ISD::FP_TO_SINT: {
840 SDValue Result;
841 if (expandFP_TO_SINT(N, Result, DAG))
842 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000843 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000844 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000845 case ISD::SDIVREM: {
846 SDValue Op = SDValue(N, 1);
847 SDValue RES = LowerSDIVREM(Op, DAG);
848 Results.push_back(RES);
849 Results.push_back(RES.getValue(1));
850 break;
851 }
852 case ISD::UDIVREM: {
853 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000854 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000855 break;
856 }
857 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000858}
859
Tom Stellard880a80a2014-06-17 16:53:14 +0000860SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
861 SDValue Vector) const {
862
863 SDLoc DL(Vector);
864 EVT VecVT = Vector.getValueType();
865 EVT EltVT = VecVT.getVectorElementType();
866 SmallVector<SDValue, 8> Args;
867
868 for (unsigned i = 0, e = VecVT.getVectorNumElements();
869 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000870 Args.push_back(DAG.getNode(
871 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
872 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000873 }
874
875 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
876}
877
878SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
879 SelectionDAG &DAG) const {
880
881 SDLoc DL(Op);
882 SDValue Vector = Op.getOperand(0);
883 SDValue Index = Op.getOperand(1);
884
885 if (isa<ConstantSDNode>(Index) ||
886 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
887 return Op;
888
889 Vector = vectorToVerticalVector(DAG, Vector);
890 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
891 Vector, Index);
892}
893
894SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
895 SelectionDAG &DAG) const {
896 SDLoc DL(Op);
897 SDValue Vector = Op.getOperand(0);
898 SDValue Value = Op.getOperand(1);
899 SDValue Index = Op.getOperand(2);
900
901 if (isa<ConstantSDNode>(Index) ||
902 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
903 return Op;
904
905 Vector = vectorToVerticalVector(DAG, Vector);
906 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
907 Vector, Value, Index);
908 return vectorToVerticalVector(DAG, Insert);
909}
910
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000911SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
912 // On hw >= R700, COS/SIN input must be between -1. and 1.
913 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
914 EVT VT = Op.getValueType();
915 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000916 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000917
918 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000919 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
920 DAG.getNode(ISD::FADD, DL, VT,
921 DAG.getNode(ISD::FMUL, DL, VT, Arg,
922 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
923 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000924 unsigned TrigNode;
925 switch (Op.getOpcode()) {
926 case ISD::FCOS:
927 TrigNode = AMDGPUISD::COS_HW;
928 break;
929 case ISD::FSIN:
930 TrigNode = AMDGPUISD::SIN_HW;
931 break;
932 default:
933 llvm_unreachable("Wrong trig opcode");
934 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000935 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
936 DAG.getNode(ISD::FADD, DL, VT, FractPart,
937 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000938 if (Gen >= AMDGPUSubtarget::R700)
939 return TrigVal;
940 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000941 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
942 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000943}
944
Jan Vesely25f36272014-06-18 12:27:13 +0000945SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
946 SDLoc DL(Op);
947 EVT VT = Op.getValueType();
948
949 SDValue Lo = Op.getOperand(0);
950 SDValue Hi = Op.getOperand(1);
951 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000952 SDValue Zero = DAG.getConstant(0, DL, VT);
953 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000954
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000955 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
956 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000957 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
958 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
959
960 // The dance around Width1 is necessary for 0 special case.
961 // Without it the CompShift might be 32, producing incorrect results in
962 // Overflow. So we do the shift in two steps, the alternative is to
963 // add a conditional to filter the special case.
964
965 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
966 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
967
968 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
969 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
970 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
971
972 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
973 SDValue LoBig = Zero;
974
975 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
976 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
977
978 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
979}
980
Jan Vesely900ff2e2014-06-18 12:27:15 +0000981SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
982 SDLoc DL(Op);
983 EVT VT = Op.getValueType();
984
985 SDValue Lo = Op.getOperand(0);
986 SDValue Hi = Op.getOperand(1);
987 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000988 SDValue Zero = DAG.getConstant(0, DL, VT);
989 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000990
Jan Veselyecf51332014-06-18 12:27:17 +0000991 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
992
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000993 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
994 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000995 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
996 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
997
998 // The dance around Width1 is necessary for 0 special case.
999 // Without it the CompShift might be 32, producing incorrect results in
1000 // Overflow. So we do the shift in two steps, the alternative is to
1001 // add a conditional to filter the special case.
1002
1003 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1004 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1005
Jan Veselyecf51332014-06-18 12:27:17 +00001006 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001007 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1008 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1009
Jan Veselyecf51332014-06-18 12:27:17 +00001010 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1011 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001012
1013 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1014 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1015
1016 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1017}
1018
Jan Vesely808fff52015-04-30 17:15:56 +00001019SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1020 unsigned mainop, unsigned ovf) const {
1021 SDLoc DL(Op);
1022 EVT VT = Op.getValueType();
1023
1024 SDValue Lo = Op.getOperand(0);
1025 SDValue Hi = Op.getOperand(1);
1026
1027 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1028 // Extend sign.
1029 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1030 DAG.getValueType(MVT::i1));
1031
1032 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1033
1034 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1035}
1036
Tom Stellard75aadc22012-12-11 21:25:42 +00001037SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001038 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001039 return DAG.getNode(
1040 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001041 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001042 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001043 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001044 DAG.getCondCode(ISD::SETNE)
1045 );
1046}
1047
Tom Stellard75aadc22012-12-11 21:25:42 +00001048SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001049 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001050 unsigned DwordOffset) const {
1051 unsigned ByteOffset = DwordOffset * 4;
1052 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001053 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001054
1055 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1056 assert(isInt<16>(ByteOffset));
1057
1058 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001059 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001060 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1061 false, false, false, 0);
1062}
1063
Tom Stellard75aadc22012-12-11 21:25:42 +00001064bool R600TargetLowering::isZero(SDValue Op) const {
1065 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1066 return Cst->isNullValue();
1067 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1068 return CstFP->isZero();
1069 } else {
1070 return false;
1071 }
1072}
1073
1074SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001075 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001076 EVT VT = Op.getValueType();
1077
1078 SDValue LHS = Op.getOperand(0);
1079 SDValue RHS = Op.getOperand(1);
1080 SDValue True = Op.getOperand(2);
1081 SDValue False = Op.getOperand(3);
1082 SDValue CC = Op.getOperand(4);
1083 SDValue Temp;
1084
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001085 if (VT == MVT::f32) {
1086 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1087 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1088 if (MinMax)
1089 return MinMax;
1090 }
1091
Tom Stellard75aadc22012-12-11 21:25:42 +00001092 // LHS and RHS are guaranteed to be the same value type
1093 EVT CompareVT = LHS.getValueType();
1094
1095 // Check if we can lower this to a native operation.
1096
Tom Stellard2add82d2013-03-08 15:37:09 +00001097 // Try to lower to a SET* instruction:
1098 //
1099 // SET* can match the following patterns:
1100 //
Tom Stellardcd428182013-09-28 02:50:38 +00001101 // select_cc f32, f32, -1, 0, cc_supported
1102 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1103 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001104 //
1105
1106 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001107 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1108 ISD::CondCode InverseCC =
1109 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001110 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1111 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1112 std::swap(False, True);
1113 CC = DAG.getCondCode(InverseCC);
1114 } else {
1115 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1116 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1117 std::swap(False, True);
1118 std::swap(LHS, RHS);
1119 CC = DAG.getCondCode(SwapInvCC);
1120 }
1121 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001122 }
1123
1124 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1125 (CompareVT == VT || VT == MVT::i32)) {
1126 // This can be matched by a SET* instruction.
1127 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1128 }
1129
Tom Stellard75aadc22012-12-11 21:25:42 +00001130 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001131 //
1132 // CND* can match the following patterns:
1133 //
Tom Stellardcd428182013-09-28 02:50:38 +00001134 // select_cc f32, 0.0, f32, f32, cc_supported
1135 // select_cc f32, 0.0, i32, i32, cc_supported
1136 // select_cc i32, 0, f32, f32, cc_supported
1137 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001138 //
Tom Stellardcd428182013-09-28 02:50:38 +00001139
1140 // Try to move the zero value to the RHS
1141 if (isZero(LHS)) {
1142 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1143 // Try swapping the operands
1144 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1145 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1146 std::swap(LHS, RHS);
1147 CC = DAG.getCondCode(CCSwapped);
1148 } else {
1149 // Try inverting the conditon and then swapping the operands
1150 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1151 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1152 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1153 std::swap(True, False);
1154 std::swap(LHS, RHS);
1155 CC = DAG.getCondCode(CCSwapped);
1156 }
1157 }
1158 }
1159 if (isZero(RHS)) {
1160 SDValue Cond = LHS;
1161 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001162 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1163 if (CompareVT != VT) {
1164 // Bitcast True / False to the correct types. This will end up being
1165 // a nop, but it allows us to define only a single pattern in the
1166 // .TD files for each CND* instruction rather than having to have
1167 // one pattern for integer True/False and one for fp True/False
1168 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1169 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1170 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001171
1172 switch (CCOpcode) {
1173 case ISD::SETONE:
1174 case ISD::SETUNE:
1175 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001176 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1177 Temp = True;
1178 True = False;
1179 False = Temp;
1180 break;
1181 default:
1182 break;
1183 }
1184 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1185 Cond, Zero,
1186 True, False,
1187 DAG.getCondCode(CCOpcode));
1188 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1189 }
1190
Tom Stellard75aadc22012-12-11 21:25:42 +00001191 // If we make it this for it means we have no native instructions to handle
1192 // this SELECT_CC, so we must lower it.
1193 SDValue HWTrue, HWFalse;
1194
1195 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001196 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1197 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001198 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001199 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1200 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001201 }
1202 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001203 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001204 }
1205
1206 // Lower this unsupported SELECT_CC into a combination of two supported
1207 // SELECT_CC operations.
1208 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1209
1210 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1211 Cond, HWFalse,
1212 True, False,
1213 DAG.getCondCode(ISD::SETNE));
1214}
1215
Alp Tokercb402912014-01-24 17:20:08 +00001216/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001217/// convert these pointers to a register index. Each register holds
1218/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1219/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1220/// for indirect addressing.
1221SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1222 unsigned StackWidth,
1223 SelectionDAG &DAG) const {
1224 unsigned SRLPad;
1225 switch(StackWidth) {
1226 case 1:
1227 SRLPad = 2;
1228 break;
1229 case 2:
1230 SRLPad = 3;
1231 break;
1232 case 4:
1233 SRLPad = 4;
1234 break;
1235 default: llvm_unreachable("Invalid stack width");
1236 }
1237
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001238 SDLoc DL(Ptr);
1239 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1240 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001241}
1242
1243void R600TargetLowering::getStackAddress(unsigned StackWidth,
1244 unsigned ElemIdx,
1245 unsigned &Channel,
1246 unsigned &PtrIncr) const {
1247 switch (StackWidth) {
1248 default:
1249 case 1:
1250 Channel = 0;
1251 if (ElemIdx > 0) {
1252 PtrIncr = 1;
1253 } else {
1254 PtrIncr = 0;
1255 }
1256 break;
1257 case 2:
1258 Channel = ElemIdx % 2;
1259 if (ElemIdx == 2) {
1260 PtrIncr = 1;
1261 } else {
1262 PtrIncr = 0;
1263 }
1264 break;
1265 case 4:
1266 Channel = ElemIdx;
1267 PtrIncr = 0;
1268 break;
1269 }
1270}
1271
Tom Stellard75aadc22012-12-11 21:25:42 +00001272SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001273 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001274 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1275 SDValue Chain = Op.getOperand(0);
1276 SDValue Value = Op.getOperand(1);
1277 SDValue Ptr = Op.getOperand(2);
1278
Tom Stellard2ffc3302013-08-26 15:05:44 +00001279 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001280 if (Result.getNode()) {
1281 return Result;
1282 }
1283
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001284 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1285 if (StoreNode->isTruncatingStore()) {
1286 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001287 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001288 EVT MemVT = StoreNode->getMemoryVT();
1289 SDValue MaskConstant;
1290 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001291 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001292 } else {
1293 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001294 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001295 }
1296 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001297 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001298 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001299 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001300 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1301 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001302 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001303 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1304 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1305 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1306 // vector instead.
1307 SDValue Src[4] = {
1308 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001309 DAG.getConstant(0, DL, MVT::i32),
1310 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001311 Mask
1312 };
Craig Topper48d114b2014-04-26 18:35:24 +00001313 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001314 SDValue Args[3] = { Chain, Input, DWordAddr };
1315 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001316 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001317 StoreNode->getMemOperand());
1318 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1319 Value.getValueType().bitsGE(MVT::i32)) {
1320 // Convert pointer from byte address to dword address.
1321 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1322 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001323 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001324
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001325 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001326 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001327 } else {
1328 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1329 }
1330 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001331 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001332 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001333
1334 EVT ValueVT = Value.getValueType();
1335
1336 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1337 return SDValue();
1338 }
1339
Tom Stellarde9373602014-01-22 19:24:14 +00001340 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1341 if (Ret.getNode()) {
1342 return Ret;
1343 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001344 // Lowering for indirect addressing
1345
1346 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001347 const AMDGPUFrameLowering *TFL =
1348 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001349 unsigned StackWidth = TFL->getStackWidth(MF);
1350
1351 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1352
1353 if (ValueVT.isVector()) {
1354 unsigned NumElemVT = ValueVT.getVectorNumElements();
1355 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001356 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001357
1358 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1359 "vector width in load");
1360
1361 for (unsigned i = 0; i < NumElemVT; ++i) {
1362 unsigned Channel, PtrIncr;
1363 getStackAddress(StackWidth, i, Channel, PtrIncr);
1364 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001365 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001366 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001367 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001368
1369 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1370 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001371 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001372 }
Craig Topper48d114b2014-04-26 18:35:24 +00001373 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001374 } else {
1375 if (ValueVT == MVT::i8) {
1376 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1377 }
1378 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001379 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001380 }
1381
1382 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001383}
1384
Tom Stellard365366f2013-01-23 02:09:06 +00001385// return (512 + (kc_bank << 12)
1386static int
1387ConstantAddressBlock(unsigned AddressSpace) {
1388 switch (AddressSpace) {
1389 case AMDGPUAS::CONSTANT_BUFFER_0:
1390 return 512;
1391 case AMDGPUAS::CONSTANT_BUFFER_1:
1392 return 512 + 4096;
1393 case AMDGPUAS::CONSTANT_BUFFER_2:
1394 return 512 + 4096 * 2;
1395 case AMDGPUAS::CONSTANT_BUFFER_3:
1396 return 512 + 4096 * 3;
1397 case AMDGPUAS::CONSTANT_BUFFER_4:
1398 return 512 + 4096 * 4;
1399 case AMDGPUAS::CONSTANT_BUFFER_5:
1400 return 512 + 4096 * 5;
1401 case AMDGPUAS::CONSTANT_BUFFER_6:
1402 return 512 + 4096 * 6;
1403 case AMDGPUAS::CONSTANT_BUFFER_7:
1404 return 512 + 4096 * 7;
1405 case AMDGPUAS::CONSTANT_BUFFER_8:
1406 return 512 + 4096 * 8;
1407 case AMDGPUAS::CONSTANT_BUFFER_9:
1408 return 512 + 4096 * 9;
1409 case AMDGPUAS::CONSTANT_BUFFER_10:
1410 return 512 + 4096 * 10;
1411 case AMDGPUAS::CONSTANT_BUFFER_11:
1412 return 512 + 4096 * 11;
1413 case AMDGPUAS::CONSTANT_BUFFER_12:
1414 return 512 + 4096 * 12;
1415 case AMDGPUAS::CONSTANT_BUFFER_13:
1416 return 512 + 4096 * 13;
1417 case AMDGPUAS::CONSTANT_BUFFER_14:
1418 return 512 + 4096 * 14;
1419 case AMDGPUAS::CONSTANT_BUFFER_15:
1420 return 512 + 4096 * 15;
1421 default:
1422 return -1;
1423 }
1424}
1425
1426SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1427{
1428 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001429 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001430 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1431 SDValue Chain = Op.getOperand(0);
1432 SDValue Ptr = Op.getOperand(1);
1433 SDValue LoweredLoad;
1434
Matt Arsenault8b03e6c2015-07-09 18:47:03 +00001435 if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1436 return Ret;
Tom Stellarde9373602014-01-22 19:24:14 +00001437
Tom Stellard067c8152014-07-21 14:01:14 +00001438 // Lower loads constant address space global variable loads
1439 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001440 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001441 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001442
Mehdi Amini44ede332015-07-09 02:09:04 +00001443 SDValue Ptr = DAG.getZExtOrTrunc(
1444 LoadNode->getBasePtr(), DL,
1445 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001446 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001447 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001448 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1449 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001450 DAG.getTargetConstant(0, DL, MVT::i32),
1451 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001452 }
Tom Stellarde9373602014-01-22 19:24:14 +00001453
Tom Stellard35bb18c2013-08-26 15:06:04 +00001454 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1455 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001456 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001457 Chain
1458 };
Craig Topper64941d92014-04-27 19:20:57 +00001459 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001460 }
1461
Tom Stellard365366f2013-01-23 02:09:06 +00001462 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001463 if (ConstantBlock > -1 &&
1464 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1465 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001466 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001467 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1468 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001469 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001470 SDValue Slots[4];
1471 for (unsigned i = 0; i < 4; i++) {
1472 // We want Const position encoded with the following formula :
1473 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1474 // const_index is Ptr computed by llvm using an alignment of 16.
1475 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1476 // then div by 4 at the ISel step
1477 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001478 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001479 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1480 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001481 EVT NewVT = MVT::v4i32;
1482 unsigned NumElements = 4;
1483 if (VT.isVector()) {
1484 NewVT = VT;
1485 NumElements = VT.getVectorNumElements();
1486 }
Craig Topper48d114b2014-04-26 18:35:24 +00001487 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001488 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001489 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001490 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001491 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001492 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1493 DAG.getConstant(4, DL, MVT::i32)),
1494 DAG.getConstant(LoadNode->getAddressSpace() -
1495 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001496 );
1497 }
1498
1499 if (!VT.isVector()) {
1500 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001501 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001502 }
1503
1504 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001505 Result,
1506 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001507 };
Craig Topper64941d92014-04-27 19:20:57 +00001508 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001509 }
1510
Matt Arsenault909d0c02013-10-30 23:43:29 +00001511 // For most operations returning SDValue() will result in the node being
1512 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1513 // need to manually expand loads that may be legal in some address spaces and
1514 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1515 // compute shaders, since the data is sign extended when it is uploaded to the
1516 // buffer. However SEXT loads from other address spaces are not supported, so
1517 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001518 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1519 EVT MemVT = LoadNode->getMemoryVT();
1520 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001521 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1522 LoadNode->getPointerInfo(), MemVT,
1523 LoadNode->isVolatile(),
1524 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001525 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001526 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001527 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1528 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001529
Jan Veselyb670d372015-05-26 18:07:22 +00001530 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001531 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001532 }
1533
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001534 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1535 return SDValue();
1536 }
1537
1538 // Lowering for indirect addressing
1539 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001540 const AMDGPUFrameLowering *TFL =
1541 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001542 unsigned StackWidth = TFL->getStackWidth(MF);
1543
1544 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1545
1546 if (VT.isVector()) {
1547 unsigned NumElemVT = VT.getVectorNumElements();
1548 EVT ElemVT = VT.getVectorElementType();
1549 SDValue Loads[4];
1550
1551 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1552 "vector width in load");
1553
1554 for (unsigned i = 0; i < NumElemVT; ++i) {
1555 unsigned Channel, PtrIncr;
1556 getStackAddress(StackWidth, i, Channel, PtrIncr);
1557 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001558 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001559 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1560 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001561 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001562 Op.getOperand(2));
1563 }
1564 for (unsigned i = NumElemVT; i < 4; ++i) {
1565 Loads[i] = DAG.getUNDEF(ElemVT);
1566 }
1567 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001568 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001569 } else {
1570 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1571 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001572 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001573 Op.getOperand(2));
1574 }
1575
Matt Arsenault7939acd2014-04-07 16:44:24 +00001576 SDValue Ops[2] = {
1577 LoweredLoad,
1578 Chain
1579 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001580
Craig Topper64941d92014-04-27 19:20:57 +00001581 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001582}
Tom Stellard75aadc22012-12-11 21:25:42 +00001583
Matt Arsenault1d555c42014-06-23 18:00:55 +00001584SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1585 SDValue Chain = Op.getOperand(0);
1586 SDValue Cond = Op.getOperand(1);
1587 SDValue Jump = Op.getOperand(2);
1588
1589 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1590 Chain, Jump, Cond);
1591}
1592
Tom Stellard75aadc22012-12-11 21:25:42 +00001593/// XXX Only kernel functions are supported, so we can assume for now that
1594/// every function is a kernel function, but in the future we should use
1595/// separate calling conventions for kernel and non-kernel functions.
1596SDValue R600TargetLowering::LowerFormalArguments(
1597 SDValue Chain,
1598 CallingConv::ID CallConv,
1599 bool isVarArg,
1600 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001601 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001602 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001603 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001604 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1605 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001606 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001607 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001608
Tom Stellardaf775432013-10-23 00:44:32 +00001609 SmallVector<ISD::InputArg, 8> LocalIns;
1610
Matt Arsenault209a7b92014-04-18 07:40:20 +00001611 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001612
1613 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001614
Tom Stellard1e803092013-07-23 01:48:18 +00001615 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001616 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001617 const ISD::InputArg &In = Ins[i];
1618 EVT VT = In.VT;
1619 EVT MemVT = VA.getLocVT();
1620 if (!VT.isVector() && MemVT.isVector()) {
1621 // Get load source type if scalarized.
1622 MemVT = MemVT.getVectorElementType();
1623 }
Tom Stellard78e01292013-07-23 01:47:58 +00001624
Jan Veselye5121f32014-10-14 20:05:26 +00001625 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001626 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1627 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1628 InVals.push_back(Register);
1629 continue;
1630 }
1631
Tom Stellard75aadc22012-12-11 21:25:42 +00001632 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001633 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001634
Matt Arsenaultfae02982014-03-17 18:58:11 +00001635 // i64 isn't a legal type, so the register type used ends up as i32, which
1636 // isn't expected here. It attempts to create this sextload, but it ends up
1637 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1638 // for <1 x i64>.
1639
Tom Stellardacfeebf2013-07-23 01:48:05 +00001640 // The first 36 bytes of the input buffer contains information about
1641 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001642 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1643 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1644 // FIXME: This should really check the extload type, but the handling of
1645 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001646
Matt Arsenault74ef2772014-08-13 18:14:11 +00001647 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1648 Ext = ISD::SEXTLOAD;
1649 }
1650
1651 // Compute the offset from the value.
1652 // XXX - I think PartOffset should give you this, but it seems to give the
1653 // size of the register which isn't useful.
1654
Andrew Trick05938a52015-02-16 18:10:47 +00001655 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001656 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001657 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001658
1659 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1660 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001661 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001662 DAG.getUNDEF(MVT::i32),
1663 PtrInfo,
1664 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001665
1666 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001667 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001668 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001669 }
1670 return Chain;
1671}
1672
Mehdi Amini44ede332015-07-09 02:09:04 +00001673EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1674 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001675 if (!VT.isVector())
1676 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001677 return VT.changeVectorElementTypeToInteger();
1678}
1679
Matt Arsenault209a7b92014-04-18 07:40:20 +00001680static SDValue CompactSwizzlableVector(
1681 SelectionDAG &DAG, SDValue VectorEntry,
1682 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001683 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1684 assert(RemapSwizzle.empty());
1685 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001686 VectorEntry.getOperand(0),
1687 VectorEntry.getOperand(1),
1688 VectorEntry.getOperand(2),
1689 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001690 };
1691
1692 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001693 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1694 // We mask write here to teach later passes that the ith element of this
1695 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1696 // break false dependencies and additionnaly make assembly easier to read.
1697 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001698 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1699 if (C->isZero()) {
1700 RemapSwizzle[i] = 4; // SEL_0
1701 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1702 } else if (C->isExactlyValue(1.0)) {
1703 RemapSwizzle[i] = 5; // SEL_1
1704 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1705 }
1706 }
1707
1708 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1709 continue;
1710 for (unsigned j = 0; j < i; j++) {
1711 if (NewBldVec[i] == NewBldVec[j]) {
1712 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1713 RemapSwizzle[i] = j;
1714 break;
1715 }
1716 }
1717 }
1718
1719 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001720 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001721}
1722
Benjamin Kramer193960c2013-06-11 13:32:25 +00001723static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1724 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001725 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1726 assert(RemapSwizzle.empty());
1727 SDValue NewBldVec[4] = {
1728 VectorEntry.getOperand(0),
1729 VectorEntry.getOperand(1),
1730 VectorEntry.getOperand(2),
1731 VectorEntry.getOperand(3)
1732 };
1733 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001734 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001735 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001736 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1737 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1738 ->getZExtValue();
1739 if (i == Idx)
1740 isUnmovable[Idx] = true;
1741 }
1742 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001743
1744 for (unsigned i = 0; i < 4; i++) {
1745 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1746 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1747 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001748 if (isUnmovable[Idx])
1749 continue;
1750 // Swap i and Idx
1751 std::swap(NewBldVec[Idx], NewBldVec[i]);
1752 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1753 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001754 }
1755 }
1756
1757 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001758 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001759}
1760
1761
1762SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001763 SDValue Swz[4], SelectionDAG &DAG,
1764 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001765 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1766 // Old -> New swizzle values
1767 DenseMap<unsigned, unsigned> SwizzleRemap;
1768
1769 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1770 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001771 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001772 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001773 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001774 }
1775
1776 SwizzleRemap.clear();
1777 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1778 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001779 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001780 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001781 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001782 }
1783
1784 return BuildVector;
1785}
1786
1787
Tom Stellard75aadc22012-12-11 21:25:42 +00001788//===----------------------------------------------------------------------===//
1789// Custom DAG Optimizations
1790//===----------------------------------------------------------------------===//
1791
1792SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1793 DAGCombinerInfo &DCI) const {
1794 SelectionDAG &DAG = DCI.DAG;
1795
1796 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001797 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001798 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1799 case ISD::FP_ROUND: {
1800 SDValue Arg = N->getOperand(0);
1801 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001802 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001803 Arg.getOperand(0));
1804 }
1805 break;
1806 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001807
1808 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1809 // (i32 select_cc f32, f32, -1, 0 cc)
1810 //
1811 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1812 // this to one of the SET*_DX10 instructions.
1813 case ISD::FP_TO_SINT: {
1814 SDValue FNeg = N->getOperand(0);
1815 if (FNeg.getOpcode() != ISD::FNEG) {
1816 return SDValue();
1817 }
1818 SDValue SelectCC = FNeg.getOperand(0);
1819 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1820 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1821 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1822 !isHWTrueValue(SelectCC.getOperand(2)) ||
1823 !isHWFalseValue(SelectCC.getOperand(3))) {
1824 return SDValue();
1825 }
1826
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001827 SDLoc dl(N);
1828 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001829 SelectCC.getOperand(0), // LHS
1830 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001831 DAG.getConstant(-1, dl, MVT::i32), // True
1832 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001833 SelectCC.getOperand(4)); // CC
1834
1835 break;
1836 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001837
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001838 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1839 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001840 case ISD::INSERT_VECTOR_ELT: {
1841 SDValue InVec = N->getOperand(0);
1842 SDValue InVal = N->getOperand(1);
1843 SDValue EltNo = N->getOperand(2);
1844 SDLoc dl(N);
1845
1846 // If the inserted element is an UNDEF, just use the input vector.
1847 if (InVal.getOpcode() == ISD::UNDEF)
1848 return InVec;
1849
1850 EVT VT = InVec.getValueType();
1851
1852 // If we can't generate a legal BUILD_VECTOR, exit
1853 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1854 return SDValue();
1855
1856 // Check that we know which element is being inserted
1857 if (!isa<ConstantSDNode>(EltNo))
1858 return SDValue();
1859 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1860
1861 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1862 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1863 // vector elements.
1864 SmallVector<SDValue, 8> Ops;
1865 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1866 Ops.append(InVec.getNode()->op_begin(),
1867 InVec.getNode()->op_end());
1868 } else if (InVec.getOpcode() == ISD::UNDEF) {
1869 unsigned NElts = VT.getVectorNumElements();
1870 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1871 } else {
1872 return SDValue();
1873 }
1874
1875 // Insert the element
1876 if (Elt < Ops.size()) {
1877 // All the operands of BUILD_VECTOR must have the same type;
1878 // we enforce that here.
1879 EVT OpVT = Ops[0].getValueType();
1880 if (InVal.getValueType() != OpVT)
1881 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1882 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1883 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1884 Ops[Elt] = InVal;
1885 }
1886
1887 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001888 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001889 }
1890
Tom Stellard365366f2013-01-23 02:09:06 +00001891 // Extract_vec (Build_vector) generated by custom lowering
1892 // also needs to be customly combined
1893 case ISD::EXTRACT_VECTOR_ELT: {
1894 SDValue Arg = N->getOperand(0);
1895 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1896 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1897 unsigned Element = Const->getZExtValue();
1898 return Arg->getOperand(Element);
1899 }
1900 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001901 if (Arg.getOpcode() == ISD::BITCAST &&
1902 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1903 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1904 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001905 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001906 Arg->getOperand(0).getOperand(Element));
1907 }
1908 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00001909 break;
Tom Stellard365366f2013-01-23 02:09:06 +00001910 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001911
1912 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001913 // Try common optimizations
1914 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1915 if (Ret.getNode())
1916 return Ret;
1917
Tom Stellarde06163a2013-02-07 14:02:35 +00001918 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1919 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001920 //
1921 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1922 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001923 SDValue LHS = N->getOperand(0);
1924 if (LHS.getOpcode() != ISD::SELECT_CC) {
1925 return SDValue();
1926 }
1927
1928 SDValue RHS = N->getOperand(1);
1929 SDValue True = N->getOperand(2);
1930 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001931 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001932
1933 if (LHS.getOperand(2).getNode() != True.getNode() ||
1934 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001935 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001936 return SDValue();
1937 }
1938
Tom Stellard5e524892013-03-08 15:37:11 +00001939 switch (NCC) {
1940 default: return SDValue();
1941 case ISD::SETNE: return LHS;
1942 case ISD::SETEQ: {
1943 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1944 LHSCC = ISD::getSetCCInverse(LHSCC,
1945 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001946 if (DCI.isBeforeLegalizeOps() ||
1947 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1948 return DAG.getSelectCC(SDLoc(N),
1949 LHS.getOperand(0),
1950 LHS.getOperand(1),
1951 LHS.getOperand(2),
1952 LHS.getOperand(3),
1953 LHSCC);
1954 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001955 }
Tom Stellard5e524892013-03-08 15:37:11 +00001956 }
Tom Stellardcd428182013-09-28 02:50:38 +00001957 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001958 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001959
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001960 case AMDGPUISD::EXPORT: {
1961 SDValue Arg = N->getOperand(1);
1962 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1963 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001964
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001965 SDValue NewArgs[8] = {
1966 N->getOperand(0), // Chain
1967 SDValue(),
1968 N->getOperand(2), // ArrayBase
1969 N->getOperand(3), // Type
1970 N->getOperand(4), // SWZ_X
1971 N->getOperand(5), // SWZ_Y
1972 N->getOperand(6), // SWZ_Z
1973 N->getOperand(7) // SWZ_W
1974 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001975 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001976 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00001977 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001978 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001979 case AMDGPUISD::TEXTURE_FETCH: {
1980 SDValue Arg = N->getOperand(1);
1981 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1982 break;
1983
1984 SDValue NewArgs[19] = {
1985 N->getOperand(0),
1986 N->getOperand(1),
1987 N->getOperand(2),
1988 N->getOperand(3),
1989 N->getOperand(4),
1990 N->getOperand(5),
1991 N->getOperand(6),
1992 N->getOperand(7),
1993 N->getOperand(8),
1994 N->getOperand(9),
1995 N->getOperand(10),
1996 N->getOperand(11),
1997 N->getOperand(12),
1998 N->getOperand(13),
1999 N->getOperand(14),
2000 N->getOperand(15),
2001 N->getOperand(16),
2002 N->getOperand(17),
2003 N->getOperand(18),
2004 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002005 SDLoc DL(N);
2006 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2007 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002008 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002009 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002010
2011 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002012}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002013
2014static bool
2015FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002016 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002017 const R600InstrInfo *TII =
2018 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002019 if (!Src.isMachineOpcode())
2020 return false;
2021 switch (Src.getMachineOpcode()) {
2022 case AMDGPU::FNEG_R600:
2023 if (!Neg.getNode())
2024 return false;
2025 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002026 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002027 return true;
2028 case AMDGPU::FABS_R600:
2029 if (!Abs.getNode())
2030 return false;
2031 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002032 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002033 return true;
2034 case AMDGPU::CONST_COPY: {
2035 unsigned Opcode = ParentNode->getMachineOpcode();
2036 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2037
2038 if (!Sel.getNode())
2039 return false;
2040
2041 SDValue CstOffset = Src.getOperand(0);
2042 if (ParentNode->getValueType(0).isVector())
2043 return false;
2044
2045 // Gather constants values
2046 int SrcIndices[] = {
2047 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2048 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2049 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2050 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2051 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2052 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2053 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2054 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2055 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2056 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2057 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2058 };
2059 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002060 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002061 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2062 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2063 continue;
2064 if (HasDst) {
2065 OtherSrcIdx--;
2066 OtherSelIdx--;
2067 }
2068 if (RegisterSDNode *Reg =
2069 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2070 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002071 ConstantSDNode *Cst
2072 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002073 Consts.push_back(Cst->getZExtValue());
2074 }
2075 }
2076 }
2077
Matt Arsenault37c12d72014-05-12 20:42:57 +00002078 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002079 Consts.push_back(Cst->getZExtValue());
2080 if (!TII->fitsConstReadLimitations(Consts)) {
2081 return false;
2082 }
2083
2084 Sel = CstOffset;
2085 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2086 return true;
2087 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002088 case AMDGPU::MOV_IMM_I32:
2089 case AMDGPU::MOV_IMM_F32: {
2090 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2091 uint64_t ImmValue = 0;
2092
2093
2094 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2095 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2096 float FloatValue = FPC->getValueAPF().convertToFloat();
2097 if (FloatValue == 0.0) {
2098 ImmReg = AMDGPU::ZERO;
2099 } else if (FloatValue == 0.5) {
2100 ImmReg = AMDGPU::HALF;
2101 } else if (FloatValue == 1.0) {
2102 ImmReg = AMDGPU::ONE;
2103 } else {
2104 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2105 }
2106 } else {
2107 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2108 uint64_t Value = C->getZExtValue();
2109 if (Value == 0) {
2110 ImmReg = AMDGPU::ZERO;
2111 } else if (Value == 1) {
2112 ImmReg = AMDGPU::ONE_INT;
2113 } else {
2114 ImmValue = Value;
2115 }
2116 }
2117
2118 // Check that we aren't already using an immediate.
2119 // XXX: It's possible for an instruction to have more than one
2120 // immediate operand, but this is not supported yet.
2121 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2122 if (!Imm.getNode())
2123 return false;
2124 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2125 assert(C);
2126 if (C->getZExtValue())
2127 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002128 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002129 }
2130 Src = DAG.getRegister(ImmReg, MVT::i32);
2131 return true;
2132 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002133 default:
2134 return false;
2135 }
2136}
2137
2138
2139/// \brief Fold the instructions after selecting them
2140SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2141 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002142 const R600InstrInfo *TII =
2143 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002144 if (!Node->isMachineOpcode())
2145 return Node;
2146 unsigned Opcode = Node->getMachineOpcode();
2147 SDValue FakeOp;
2148
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002149 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002150
2151 if (Opcode == AMDGPU::DOT_4) {
2152 int OperandIdx[] = {
2153 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2154 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2155 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2156 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2157 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2158 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2159 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2160 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002161 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002162 int NegIdx[] = {
2163 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2164 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2165 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2166 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2167 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2168 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2169 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2170 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2171 };
2172 int AbsIdx[] = {
2173 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2174 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2175 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2176 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2177 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2178 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2179 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2180 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2181 };
2182 for (unsigned i = 0; i < 8; i++) {
2183 if (OperandIdx[i] < 0)
2184 return Node;
2185 SDValue &Src = Ops[OperandIdx[i] - 1];
2186 SDValue &Neg = Ops[NegIdx[i] - 1];
2187 SDValue &Abs = Ops[AbsIdx[i] - 1];
2188 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2189 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2190 if (HasDst)
2191 SelIdx--;
2192 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002193 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2194 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2195 }
2196 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2197 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2198 SDValue &Src = Ops[i];
2199 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002200 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2201 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002202 } else if (Opcode == AMDGPU::CLAMP_R600) {
2203 SDValue Src = Node->getOperand(0);
2204 if (!Src.isMachineOpcode() ||
2205 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2206 return Node;
2207 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2208 AMDGPU::OpName::clamp);
2209 if (ClampIdx < 0)
2210 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002211 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002212 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002213 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2214 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2215 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002216 } else {
2217 if (!TII->hasInstrModifiers(Opcode))
2218 return Node;
2219 int OperandIdx[] = {
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2223 };
2224 int NegIdx[] = {
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2228 };
2229 int AbsIdx[] = {
2230 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2231 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2232 -1
2233 };
2234 for (unsigned i = 0; i < 3; i++) {
2235 if (OperandIdx[i] < 0)
2236 return Node;
2237 SDValue &Src = Ops[OperandIdx[i] - 1];
2238 SDValue &Neg = Ops[NegIdx[i] - 1];
2239 SDValue FakeAbs;
2240 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2241 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2242 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002243 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2244 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002245 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002246 ImmIdx--;
2247 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002248 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002249 SDValue &Imm = Ops[ImmIdx];
2250 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002251 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2252 }
2253 }
2254
2255 return Node;
2256}