blob: 3632a9fad2ac34d6cbcf655da3b59a0cb7f66c4c [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000289 case AMDGPU::RAT_STORE_TYPED_eg: {
290 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
291 .addOperand(MI->getOperand(0))
292 .addOperand(MI->getOperand(1))
293 .addOperand(MI->getOperand(2))
294 .addImm(isEOP(I)); // Set End of program bit
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Tom Stellard75aadc22012-12-11 21:25:42 +0000298 case AMDGPU::TXD: {
299 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
300 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000301 MachineOperand &RID = MI->getOperand(4);
302 MachineOperand &SID = MI->getOperand(5);
303 unsigned TextureId = MI->getOperand(6).getImm();
304 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
305 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000306
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 switch (TextureId) {
308 case 5: // Rect
309 CTX = CTY = 0;
310 break;
311 case 6: // Shadow1D
312 SrcW = SrcZ;
313 break;
314 case 7: // Shadow2D
315 SrcW = SrcZ;
316 break;
317 case 8: // ShadowRect
318 CTX = CTY = 0;
319 SrcW = SrcZ;
320 break;
321 case 9: // 1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 10: // 2DArray
326 CTZ = 0;
327 break;
328 case 11: // Shadow1DArray
329 SrcZ = SrcY;
330 CTZ = 0;
331 break;
332 case 12: // Shadow2DArray
333 CTZ = 0;
334 break;
335 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
337 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
356 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
375 .addOperand(MI->getOperand(0))
376 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000377 .addImm(SrcX)
378 .addImm(SrcY)
379 .addImm(SrcZ)
380 .addImm(SrcW)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(1)
386 .addImm(2)
387 .addImm(3)
388 .addOperand(RID)
389 .addOperand(SID)
390 .addImm(CTX)
391 .addImm(CTY)
392 .addImm(CTZ)
393 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 .addReg(T0, RegState::Implicit)
395 .addReg(T1, RegState::Implicit);
396 break;
397 }
398
399 case AMDGPU::TXD_SHADOW: {
400 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
401 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000402 MachineOperand &RID = MI->getOperand(4);
403 MachineOperand &SID = MI->getOperand(5);
404 unsigned TextureId = MI->getOperand(6).getImm();
405 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
406 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
407
408 switch (TextureId) {
409 case 5: // Rect
410 CTX = CTY = 0;
411 break;
412 case 6: // Shadow1D
413 SrcW = SrcZ;
414 break;
415 case 7: // Shadow2D
416 SrcW = SrcZ;
417 break;
418 case 8: // ShadowRect
419 CTX = CTY = 0;
420 SrcW = SrcZ;
421 break;
422 case 9: // 1DArray
423 SrcZ = SrcY;
424 CTZ = 0;
425 break;
426 case 10: // 2DArray
427 CTZ = 0;
428 break;
429 case 11: // Shadow1DArray
430 SrcZ = SrcY;
431 CTZ = 0;
432 break;
433 case 12: // Shadow2DArray
434 CTZ = 0;
435 break;
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437
438 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
439 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
458 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
477 .addOperand(MI->getOperand(0))
478 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000479 .addImm(SrcX)
480 .addImm(SrcY)
481 .addImm(SrcZ)
482 .addImm(SrcW)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(0)
487 .addImm(1)
488 .addImm(2)
489 .addImm(3)
490 .addOperand(RID)
491 .addOperand(SID)
492 .addImm(CTX)
493 .addImm(CTY)
494 .addImm(CTZ)
495 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addReg(T0, RegState::Implicit)
497 .addReg(T1, RegState::Implicit);
498 break;
499 }
500
501 case AMDGPU::BRANCH:
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 break;
505
506 case AMDGPU::BRANCH_COND_f32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
520 case AMDGPU::BRANCH_COND_i32: {
521 MachineInstr *NewMI =
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
523 AMDGPU::PREDICATE_BIT)
524 .addOperand(MI->getOperand(1))
525 .addImm(OPCODE_IS_NOT_ZERO_INT)
526 .addImm(0); // Flags
527 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 .addOperand(MI->getOperand(0))
530 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
531 break;
532 }
533
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 case AMDGPU::EG_ExportSwz:
535 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 // Instruction is left unmodified if its not the last one of its type
537 bool isLastInstructionOfItsType = true;
538 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000539 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000540 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
543 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
544 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
545 .getImm();
546 if (CurrentInstExportType == InstExportType) {
547 isLastInstructionOfItsType = false;
548 break;
549 }
550 }
551 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000552 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000553 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 return BB;
555 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
556 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
557 .addOperand(MI->getOperand(0))
558 .addOperand(MI->getOperand(1))
559 .addOperand(MI->getOperand(2))
560 .addOperand(MI->getOperand(3))
561 .addOperand(MI->getOperand(4))
562 .addOperand(MI->getOperand(5))
563 .addOperand(MI->getOperand(6))
564 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 break;
567 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000568 case AMDGPU::RETURN: {
569 // RETURN instructions must have the live-out registers as implicit uses,
570 // otherwise they appear dead.
571 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
572 MachineInstrBuilder MIB(*MF, MI);
573 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
574 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
575 return BB;
576 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000577 }
578
579 MI->eraseFromParent();
580 return BB;
581}
582
583//===----------------------------------------------------------------------===//
584// Custom DAG Lowering Operations
585//===----------------------------------------------------------------------===//
586
Tom Stellard75aadc22012-12-11 21:25:42 +0000587SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000588 MachineFunction &MF = DAG.getMachineFunction();
589 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 switch (Op.getOpcode()) {
591 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000592 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000594 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000595 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000596 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000597 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
598 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000599 case ISD::FCOS:
600 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000603 case ISD::LOAD: {
604 SDValue Result = LowerLOAD(Op, DAG);
605 assert((!Result.getNode() ||
606 Result.getNode()->getNumValues() == 2) &&
607 "Load should return a value and a chain");
608 return Result;
609 }
610
Matt Arsenault1d555c42014-06-23 18:00:55 +0000611 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000612 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 case ISD::INTRINSIC_VOID: {
614 SDValue Chain = Op.getOperand(0);
615 unsigned IntrinsicID =
616 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
617 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000618 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000619 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000620 const SDValue Args[8] = {
621 Chain,
622 Op.getOperand(2), // Export Value
623 Op.getOperand(3), // ArrayBase
624 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000625 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
626 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
627 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
628 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000629 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000630 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000631 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000632
Tom Stellard75aadc22012-12-11 21:25:42 +0000633 // default for switch(IntrinsicID)
634 default: break;
635 }
636 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
637 break;
638 }
639 case ISD::INTRINSIC_WO_CHAIN: {
640 unsigned IntrinsicID =
641 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
642 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000643 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000644 switch(IntrinsicID) {
645 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000646 case AMDGPUIntrinsic::R600_interp_xy:
647 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000648 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000649 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000650 SDValue RegisterINode = Op.getOperand(2);
651 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000652
Vincent Lejeunef143af32013-11-11 22:10:24 +0000653 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000654 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000655 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000656 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000657 else
658 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000659 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000660 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000661 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
662 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000663 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000664 case AMDGPUIntrinsic::r600_tex:
665 case AMDGPUIntrinsic::r600_texc:
666 case AMDGPUIntrinsic::r600_txl:
667 case AMDGPUIntrinsic::r600_txlc:
668 case AMDGPUIntrinsic::r600_txb:
669 case AMDGPUIntrinsic::r600_txbc:
670 case AMDGPUIntrinsic::r600_txf:
671 case AMDGPUIntrinsic::r600_txq:
672 case AMDGPUIntrinsic::r600_ddx:
673 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000674 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000675 unsigned TextureOp;
676 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000677 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000678 TextureOp = 0;
679 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000680 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000681 TextureOp = 1;
682 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000683 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000684 TextureOp = 2;
685 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000686 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000687 TextureOp = 3;
688 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000689 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000690 TextureOp = 4;
691 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000692 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 TextureOp = 5;
694 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000695 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 TextureOp = 6;
697 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000698 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000699 TextureOp = 7;
700 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000701 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 TextureOp = 8;
703 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000704 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000705 TextureOp = 9;
706 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000707 case AMDGPUIntrinsic::R600_ldptr:
708 TextureOp = 10;
709 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000710 default:
711 llvm_unreachable("Unknow Texture Operation");
712 }
713
714 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000715 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000717 DAG.getConstant(0, DL, MVT::i32),
718 DAG.getConstant(1, DL, MVT::i32),
719 DAG.getConstant(2, DL, MVT::i32),
720 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 Op.getOperand(2),
722 Op.getOperand(3),
723 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000724 DAG.getConstant(0, DL, MVT::i32),
725 DAG.getConstant(1, DL, MVT::i32),
726 DAG.getConstant(2, DL, MVT::i32),
727 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000728 Op.getOperand(5),
729 Op.getOperand(6),
730 Op.getOperand(7),
731 Op.getOperand(8),
732 Op.getOperand(9),
733 Op.getOperand(10)
734 };
Craig Topper48d114b2014-04-26 18:35:24 +0000735 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000736 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000737 case AMDGPUIntrinsic::AMDGPU_dp4: {
738 SDValue Args[8] = {
739 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000740 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000741 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000742 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000744 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 };
Craig Topper48d114b2014-04-26 18:35:24 +0000756 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000758
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000759 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000760 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000763 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000764 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return LowerImplicitParameter(DAG, VT, DL, 8);
777
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000778 case Intrinsic::r600_read_workdim:
779 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000780 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
781 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
782 }
Jan Veselye5121f32014-10-14 20:05:26 +0000783
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
786 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000787 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000788 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
789 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
792 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000802
803 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000804 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000805 return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
806
807 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000808 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000809 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
810 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 }
812 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
813 break;
814 }
815 } // end switch(Op.getOpcode())
816 return SDValue();
817}
818
819void R600TargetLowering::ReplaceNodeResults(SDNode *N,
820 SmallVectorImpl<SDValue> &Results,
821 SelectionDAG &DAG) const {
822 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000823 default:
824 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
825 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000826 case ISD::FP_TO_UINT:
827 if (N->getValueType(0) == MVT::i1) {
828 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
829 return;
830 }
831 // Fall-through. Since we don't care about out of bounds values
832 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
833 // considers some extra cases which are not necessary here.
834 case ISD::FP_TO_SINT: {
835 SDValue Result;
836 if (expandFP_TO_SINT(N, Result, DAG))
837 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000838 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000839 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000840 case ISD::SDIVREM: {
841 SDValue Op = SDValue(N, 1);
842 SDValue RES = LowerSDIVREM(Op, DAG);
843 Results.push_back(RES);
844 Results.push_back(RES.getValue(1));
845 break;
846 }
847 case ISD::UDIVREM: {
848 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000849 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000850 break;
851 }
852 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000853}
854
Tom Stellard880a80a2014-06-17 16:53:14 +0000855SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
856 SDValue Vector) const {
857
858 SDLoc DL(Vector);
859 EVT VecVT = Vector.getValueType();
860 EVT EltVT = VecVT.getVectorElementType();
861 SmallVector<SDValue, 8> Args;
862
863 for (unsigned i = 0, e = VecVT.getVectorNumElements();
864 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000865 Args.push_back(DAG.getNode(
866 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
867 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000868 }
869
870 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
871}
872
873SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
874 SelectionDAG &DAG) const {
875
876 SDLoc DL(Op);
877 SDValue Vector = Op.getOperand(0);
878 SDValue Index = Op.getOperand(1);
879
880 if (isa<ConstantSDNode>(Index) ||
881 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
882 return Op;
883
884 Vector = vectorToVerticalVector(DAG, Vector);
885 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
886 Vector, Index);
887}
888
889SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
890 SelectionDAG &DAG) const {
891 SDLoc DL(Op);
892 SDValue Vector = Op.getOperand(0);
893 SDValue Value = Op.getOperand(1);
894 SDValue Index = Op.getOperand(2);
895
896 if (isa<ConstantSDNode>(Index) ||
897 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
898 return Op;
899
900 Vector = vectorToVerticalVector(DAG, Vector);
901 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
902 Vector, Value, Index);
903 return vectorToVerticalVector(DAG, Insert);
904}
905
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000906SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
907 // On hw >= R700, COS/SIN input must be between -1. and 1.
908 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
909 EVT VT = Op.getValueType();
910 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000911 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000912
913 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000914 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
915 DAG.getNode(ISD::FADD, DL, VT,
916 DAG.getNode(ISD::FMUL, DL, VT, Arg,
917 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
918 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000919 unsigned TrigNode;
920 switch (Op.getOpcode()) {
921 case ISD::FCOS:
922 TrigNode = AMDGPUISD::COS_HW;
923 break;
924 case ISD::FSIN:
925 TrigNode = AMDGPUISD::SIN_HW;
926 break;
927 default:
928 llvm_unreachable("Wrong trig opcode");
929 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000930 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
931 DAG.getNode(ISD::FADD, DL, VT, FractPart,
932 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000933 if (Gen >= AMDGPUSubtarget::R700)
934 return TrigVal;
935 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000936 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
937 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000938}
939
Jan Vesely25f36272014-06-18 12:27:13 +0000940SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
941 SDLoc DL(Op);
942 EVT VT = Op.getValueType();
943
944 SDValue Lo = Op.getOperand(0);
945 SDValue Hi = Op.getOperand(1);
946 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000947 SDValue Zero = DAG.getConstant(0, DL, VT);
948 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000949
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000950 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
951 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000952 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
953 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
954
955 // The dance around Width1 is necessary for 0 special case.
956 // Without it the CompShift might be 32, producing incorrect results in
957 // Overflow. So we do the shift in two steps, the alternative is to
958 // add a conditional to filter the special case.
959
960 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
961 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
962
963 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
964 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
965 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
966
967 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
968 SDValue LoBig = Zero;
969
970 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
971 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
972
973 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
974}
975
Jan Vesely900ff2e2014-06-18 12:27:15 +0000976SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
977 SDLoc DL(Op);
978 EVT VT = Op.getValueType();
979
980 SDValue Lo = Op.getOperand(0);
981 SDValue Hi = Op.getOperand(1);
982 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000983 SDValue Zero = DAG.getConstant(0, DL, VT);
984 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000985
Jan Veselyecf51332014-06-18 12:27:17 +0000986 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
987
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000988 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
989 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000990 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
991 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
992
993 // The dance around Width1 is necessary for 0 special case.
994 // Without it the CompShift might be 32, producing incorrect results in
995 // Overflow. So we do the shift in two steps, the alternative is to
996 // add a conditional to filter the special case.
997
998 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
999 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1000
Jan Veselyecf51332014-06-18 12:27:17 +00001001 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001002 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1003 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1004
Jan Veselyecf51332014-06-18 12:27:17 +00001005 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1006 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001007
1008 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1009 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1010
1011 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1012}
1013
Jan Vesely808fff52015-04-30 17:15:56 +00001014SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1015 unsigned mainop, unsigned ovf) const {
1016 SDLoc DL(Op);
1017 EVT VT = Op.getValueType();
1018
1019 SDValue Lo = Op.getOperand(0);
1020 SDValue Hi = Op.getOperand(1);
1021
1022 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1023 // Extend sign.
1024 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1025 DAG.getValueType(MVT::i1));
1026
1027 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1028
1029 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1030}
1031
Tom Stellard75aadc22012-12-11 21:25:42 +00001032SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001033 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001034 return DAG.getNode(
1035 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001036 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001037 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001038 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001039 DAG.getCondCode(ISD::SETNE)
1040 );
1041}
1042
Tom Stellard75aadc22012-12-11 21:25:42 +00001043SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001044 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001045 unsigned DwordOffset) const {
1046 unsigned ByteOffset = DwordOffset * 4;
1047 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001048 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001049
1050 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1051 assert(isInt<16>(ByteOffset));
1052
1053 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001054 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001055 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1056 false, false, false, 0);
1057}
1058
Tom Stellard75aadc22012-12-11 21:25:42 +00001059bool R600TargetLowering::isZero(SDValue Op) const {
1060 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1061 return Cst->isNullValue();
1062 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1063 return CstFP->isZero();
1064 } else {
1065 return false;
1066 }
1067}
1068
1069SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001070 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001071 EVT VT = Op.getValueType();
1072
1073 SDValue LHS = Op.getOperand(0);
1074 SDValue RHS = Op.getOperand(1);
1075 SDValue True = Op.getOperand(2);
1076 SDValue False = Op.getOperand(3);
1077 SDValue CC = Op.getOperand(4);
1078 SDValue Temp;
1079
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001080 if (VT == MVT::f32) {
1081 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1082 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1083 if (MinMax)
1084 return MinMax;
1085 }
1086
Tom Stellard75aadc22012-12-11 21:25:42 +00001087 // LHS and RHS are guaranteed to be the same value type
1088 EVT CompareVT = LHS.getValueType();
1089
1090 // Check if we can lower this to a native operation.
1091
Tom Stellard2add82d2013-03-08 15:37:09 +00001092 // Try to lower to a SET* instruction:
1093 //
1094 // SET* can match the following patterns:
1095 //
Tom Stellardcd428182013-09-28 02:50:38 +00001096 // select_cc f32, f32, -1, 0, cc_supported
1097 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1098 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001099 //
1100
1101 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001102 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1103 ISD::CondCode InverseCC =
1104 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001105 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1106 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1107 std::swap(False, True);
1108 CC = DAG.getCondCode(InverseCC);
1109 } else {
1110 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1111 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1112 std::swap(False, True);
1113 std::swap(LHS, RHS);
1114 CC = DAG.getCondCode(SwapInvCC);
1115 }
1116 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001117 }
1118
1119 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1120 (CompareVT == VT || VT == MVT::i32)) {
1121 // This can be matched by a SET* instruction.
1122 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1123 }
1124
Tom Stellard75aadc22012-12-11 21:25:42 +00001125 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001126 //
1127 // CND* can match the following patterns:
1128 //
Tom Stellardcd428182013-09-28 02:50:38 +00001129 // select_cc f32, 0.0, f32, f32, cc_supported
1130 // select_cc f32, 0.0, i32, i32, cc_supported
1131 // select_cc i32, 0, f32, f32, cc_supported
1132 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001133 //
Tom Stellardcd428182013-09-28 02:50:38 +00001134
1135 // Try to move the zero value to the RHS
1136 if (isZero(LHS)) {
1137 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1138 // Try swapping the operands
1139 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1140 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1141 std::swap(LHS, RHS);
1142 CC = DAG.getCondCode(CCSwapped);
1143 } else {
1144 // Try inverting the conditon and then swapping the operands
1145 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1146 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1147 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1148 std::swap(True, False);
1149 std::swap(LHS, RHS);
1150 CC = DAG.getCondCode(CCSwapped);
1151 }
1152 }
1153 }
1154 if (isZero(RHS)) {
1155 SDValue Cond = LHS;
1156 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001157 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1158 if (CompareVT != VT) {
1159 // Bitcast True / False to the correct types. This will end up being
1160 // a nop, but it allows us to define only a single pattern in the
1161 // .TD files for each CND* instruction rather than having to have
1162 // one pattern for integer True/False and one for fp True/False
1163 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1164 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1165 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001166
1167 switch (CCOpcode) {
1168 case ISD::SETONE:
1169 case ISD::SETUNE:
1170 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001171 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1172 Temp = True;
1173 True = False;
1174 False = Temp;
1175 break;
1176 default:
1177 break;
1178 }
1179 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1180 Cond, Zero,
1181 True, False,
1182 DAG.getCondCode(CCOpcode));
1183 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1184 }
1185
Tom Stellard75aadc22012-12-11 21:25:42 +00001186 // If we make it this for it means we have no native instructions to handle
1187 // this SELECT_CC, so we must lower it.
1188 SDValue HWTrue, HWFalse;
1189
1190 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001191 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1192 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001193 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001194 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1195 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001196 }
1197 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001198 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001199 }
1200
1201 // Lower this unsupported SELECT_CC into a combination of two supported
1202 // SELECT_CC operations.
1203 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1204
1205 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1206 Cond, HWFalse,
1207 True, False,
1208 DAG.getCondCode(ISD::SETNE));
1209}
1210
Alp Tokercb402912014-01-24 17:20:08 +00001211/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001212/// convert these pointers to a register index. Each register holds
1213/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1214/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1215/// for indirect addressing.
1216SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1217 unsigned StackWidth,
1218 SelectionDAG &DAG) const {
1219 unsigned SRLPad;
1220 switch(StackWidth) {
1221 case 1:
1222 SRLPad = 2;
1223 break;
1224 case 2:
1225 SRLPad = 3;
1226 break;
1227 case 4:
1228 SRLPad = 4;
1229 break;
1230 default: llvm_unreachable("Invalid stack width");
1231 }
1232
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001233 SDLoc DL(Ptr);
1234 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1235 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001236}
1237
1238void R600TargetLowering::getStackAddress(unsigned StackWidth,
1239 unsigned ElemIdx,
1240 unsigned &Channel,
1241 unsigned &PtrIncr) const {
1242 switch (StackWidth) {
1243 default:
1244 case 1:
1245 Channel = 0;
1246 if (ElemIdx > 0) {
1247 PtrIncr = 1;
1248 } else {
1249 PtrIncr = 0;
1250 }
1251 break;
1252 case 2:
1253 Channel = ElemIdx % 2;
1254 if (ElemIdx == 2) {
1255 PtrIncr = 1;
1256 } else {
1257 PtrIncr = 0;
1258 }
1259 break;
1260 case 4:
1261 Channel = ElemIdx;
1262 PtrIncr = 0;
1263 break;
1264 }
1265}
1266
Tom Stellard75aadc22012-12-11 21:25:42 +00001267SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001268 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001269 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1270 SDValue Chain = Op.getOperand(0);
1271 SDValue Value = Op.getOperand(1);
1272 SDValue Ptr = Op.getOperand(2);
1273
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001274 if (SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001275 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001276
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001277 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1278 if (StoreNode->isTruncatingStore()) {
1279 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001280 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001281 EVT MemVT = StoreNode->getMemoryVT();
1282 SDValue MaskConstant;
1283 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001284 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001285 } else {
1286 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001287 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001288 }
1289 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001290 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001291 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001292 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001293 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1294 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001295 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001296 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1297 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1298 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1299 // vector instead.
1300 SDValue Src[4] = {
1301 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001302 DAG.getConstant(0, DL, MVT::i32),
1303 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001304 Mask
1305 };
Craig Topper48d114b2014-04-26 18:35:24 +00001306 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001307 SDValue Args[3] = { Chain, Input, DWordAddr };
1308 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001309 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001310 StoreNode->getMemOperand());
1311 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1312 Value.getValueType().bitsGE(MVT::i32)) {
1313 // Convert pointer from byte address to dword address.
1314 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1315 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001316 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001317
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001318 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001319 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001320 } else {
1321 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1322 }
1323 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001324 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001325 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001326
1327 EVT ValueVT = Value.getValueType();
1328
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001329 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001330 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001331
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001332 if (SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
Tom Stellarde9373602014-01-22 19:24:14 +00001333 return Ret;
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001334
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001335 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001336 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001337 const AMDGPUFrameLowering *TFL =
1338 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001339 unsigned StackWidth = TFL->getStackWidth(MF);
1340
1341 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1342
1343 if (ValueVT.isVector()) {
1344 unsigned NumElemVT = ValueVT.getVectorNumElements();
1345 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001346 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001347
1348 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1349 "vector width in load");
1350
1351 for (unsigned i = 0; i < NumElemVT; ++i) {
1352 unsigned Channel, PtrIncr;
1353 getStackAddress(StackWidth, i, Channel, PtrIncr);
1354 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001355 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001356 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001357 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001358
1359 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1360 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001361 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001362 }
Craig Topper48d114b2014-04-26 18:35:24 +00001363 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001364 } else {
1365 if (ValueVT == MVT::i8) {
1366 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1367 }
1368 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001369 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001370 }
1371
1372 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001373}
1374
Tom Stellard365366f2013-01-23 02:09:06 +00001375// return (512 + (kc_bank << 12)
1376static int
1377ConstantAddressBlock(unsigned AddressSpace) {
1378 switch (AddressSpace) {
1379 case AMDGPUAS::CONSTANT_BUFFER_0:
1380 return 512;
1381 case AMDGPUAS::CONSTANT_BUFFER_1:
1382 return 512 + 4096;
1383 case AMDGPUAS::CONSTANT_BUFFER_2:
1384 return 512 + 4096 * 2;
1385 case AMDGPUAS::CONSTANT_BUFFER_3:
1386 return 512 + 4096 * 3;
1387 case AMDGPUAS::CONSTANT_BUFFER_4:
1388 return 512 + 4096 * 4;
1389 case AMDGPUAS::CONSTANT_BUFFER_5:
1390 return 512 + 4096 * 5;
1391 case AMDGPUAS::CONSTANT_BUFFER_6:
1392 return 512 + 4096 * 6;
1393 case AMDGPUAS::CONSTANT_BUFFER_7:
1394 return 512 + 4096 * 7;
1395 case AMDGPUAS::CONSTANT_BUFFER_8:
1396 return 512 + 4096 * 8;
1397 case AMDGPUAS::CONSTANT_BUFFER_9:
1398 return 512 + 4096 * 9;
1399 case AMDGPUAS::CONSTANT_BUFFER_10:
1400 return 512 + 4096 * 10;
1401 case AMDGPUAS::CONSTANT_BUFFER_11:
1402 return 512 + 4096 * 11;
1403 case AMDGPUAS::CONSTANT_BUFFER_12:
1404 return 512 + 4096 * 12;
1405 case AMDGPUAS::CONSTANT_BUFFER_13:
1406 return 512 + 4096 * 13;
1407 case AMDGPUAS::CONSTANT_BUFFER_14:
1408 return 512 + 4096 * 14;
1409 case AMDGPUAS::CONSTANT_BUFFER_15:
1410 return 512 + 4096 * 15;
1411 default:
1412 return -1;
1413 }
1414}
1415
Matt Arsenault6dfda962016-02-10 18:21:39 +00001416SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1417 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001418 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001419 LoadSDNode *Load = cast<LoadSDNode>(Op);
1420 ISD::LoadExtType ExtType = Load->getExtensionType();
1421 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001422
Matt Arsenault6dfda962016-02-10 18:21:39 +00001423 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1424 // register (2-)byte extract.
1425
1426 // Get Register holding the target.
1427 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1428 DAG.getConstant(2, DL, MVT::i32));
1429 // Load the Register.
1430 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1431 Load->getChain(),
1432 Ptr,
1433 DAG.getTargetConstant(0, DL, MVT::i32),
1434 Op.getOperand(2));
1435
1436 // Get offset within the register.
1437 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1438 Load->getBasePtr(),
1439 DAG.getConstant(0x3, DL, MVT::i32));
1440
1441 // Bit offset of target byte (byteIdx * 8).
1442 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1443 DAG.getConstant(3, DL, MVT::i32));
1444
1445 // Shift to the right.
1446 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1447
1448 // Eliminate the upper bits by setting them to ...
1449 EVT MemEltVT = MemVT.getScalarType();
1450
1451 // ... ones.
1452 if (ExtType == ISD::SEXTLOAD) {
1453 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1454
1455 SDValue Ops[] = {
1456 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1457 Load->getChain()
1458 };
1459
1460 return DAG.getMergeValues(Ops, DL);
1461 }
1462
1463 // ... or zeros.
1464 SDValue Ops[] = {
1465 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1466 Load->getChain()
1467 };
1468
1469 return DAG.getMergeValues(Ops, DL);
1470}
1471
1472SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1473 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1474 unsigned AS = LoadNode->getAddressSpace();
1475 EVT MemVT = LoadNode->getMemoryVT();
1476 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1477
1478 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1479 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1480 return lowerPrivateExtLoad(Op, DAG);
1481 }
1482
1483 SDLoc DL(Op);
1484 EVT VT = Op.getValueType();
1485 SDValue Chain = LoadNode->getChain();
1486 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001487
Tom Stellard067c8152014-07-21 14:01:14 +00001488 // Lower loads constant address space global variable loads
1489 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001490 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001491 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001492
Mehdi Amini44ede332015-07-09 02:09:04 +00001493 SDValue Ptr = DAG.getZExtOrTrunc(
1494 LoadNode->getBasePtr(), DL,
1495 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001496 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001497 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001498 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1499 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001500 DAG.getTargetConstant(0, DL, MVT::i32),
1501 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001502 }
Tom Stellarde9373602014-01-22 19:24:14 +00001503
Tom Stellard35bb18c2013-08-26 15:06:04 +00001504 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1505 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001506 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001507 Chain
1508 };
Craig Topper64941d92014-04-27 19:20:57 +00001509 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001510 }
1511
Tom Stellard365366f2013-01-23 02:09:06 +00001512 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001513 if (ConstantBlock > -1 &&
1514 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1515 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001516 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001517 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1518 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001519 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001520 SDValue Slots[4];
1521 for (unsigned i = 0; i < 4; i++) {
1522 // We want Const position encoded with the following formula :
1523 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1524 // const_index is Ptr computed by llvm using an alignment of 16.
1525 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1526 // then div by 4 at the ISel step
1527 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001528 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001529 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1530 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001531 EVT NewVT = MVT::v4i32;
1532 unsigned NumElements = 4;
1533 if (VT.isVector()) {
1534 NewVT = VT;
1535 NumElements = VT.getVectorNumElements();
1536 }
Craig Topper48d114b2014-04-26 18:35:24 +00001537 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001538 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001539 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001540 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001541 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001542 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1543 DAG.getConstant(4, DL, MVT::i32)),
1544 DAG.getConstant(LoadNode->getAddressSpace() -
1545 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001546 );
1547 }
1548
1549 if (!VT.isVector()) {
1550 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001551 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001552 }
1553
1554 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001555 Result,
1556 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001557 };
Craig Topper64941d92014-04-27 19:20:57 +00001558 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001559 }
1560
Matt Arsenault6dfda962016-02-10 18:21:39 +00001561 SDValue LoweredLoad;
1562
Matt Arsenault909d0c02013-10-30 23:43:29 +00001563 // For most operations returning SDValue() will result in the node being
1564 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1565 // need to manually expand loads that may be legal in some address spaces and
1566 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1567 // compute shaders, since the data is sign extended when it is uploaded to the
1568 // buffer. However SEXT loads from other address spaces are not supported, so
1569 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001570 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1571 EVT MemVT = LoadNode->getMemoryVT();
1572 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001573 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1574 LoadNode->getPointerInfo(), MemVT,
1575 LoadNode->isVolatile(),
1576 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001577 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001578 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001579 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1580 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001581
Jan Veselyb670d372015-05-26 18:07:22 +00001582 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001583 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001584 }
1585
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001586 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1587 return SDValue();
1588 }
1589
1590 // Lowering for indirect addressing
1591 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001592 const AMDGPUFrameLowering *TFL =
1593 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001594 unsigned StackWidth = TFL->getStackWidth(MF);
1595
1596 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1597
1598 if (VT.isVector()) {
1599 unsigned NumElemVT = VT.getVectorNumElements();
1600 EVT ElemVT = VT.getVectorElementType();
1601 SDValue Loads[4];
1602
1603 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1604 "vector width in load");
1605
1606 for (unsigned i = 0; i < NumElemVT; ++i) {
1607 unsigned Channel, PtrIncr;
1608 getStackAddress(StackWidth, i, Channel, PtrIncr);
1609 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001610 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001611 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1612 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001613 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001614 Op.getOperand(2));
1615 }
1616 for (unsigned i = NumElemVT; i < 4; ++i) {
1617 Loads[i] = DAG.getUNDEF(ElemVT);
1618 }
1619 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001620 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001621 } else {
1622 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1623 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001624 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001625 Op.getOperand(2));
1626 }
1627
Matt Arsenault7939acd2014-04-07 16:44:24 +00001628 SDValue Ops[2] = {
1629 LoweredLoad,
1630 Chain
1631 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001632
Craig Topper64941d92014-04-27 19:20:57 +00001633 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001634}
Tom Stellard75aadc22012-12-11 21:25:42 +00001635
Matt Arsenault1d555c42014-06-23 18:00:55 +00001636SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1637 SDValue Chain = Op.getOperand(0);
1638 SDValue Cond = Op.getOperand(1);
1639 SDValue Jump = Op.getOperand(2);
1640
1641 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1642 Chain, Jump, Cond);
1643}
1644
Tom Stellard75aadc22012-12-11 21:25:42 +00001645/// XXX Only kernel functions are supported, so we can assume for now that
1646/// every function is a kernel function, but in the future we should use
1647/// separate calling conventions for kernel and non-kernel functions.
1648SDValue R600TargetLowering::LowerFormalArguments(
1649 SDValue Chain,
1650 CallingConv::ID CallConv,
1651 bool isVarArg,
1652 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001653 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001654 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001655 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001656 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1657 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001658 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001659 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001660
Tom Stellardaf775432013-10-23 00:44:32 +00001661 SmallVector<ISD::InputArg, 8> LocalIns;
1662
Matt Arsenault209a7b92014-04-18 07:40:20 +00001663 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001664
1665 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001666
Tom Stellard1e803092013-07-23 01:48:18 +00001667 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001668 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001669 const ISD::InputArg &In = Ins[i];
1670 EVT VT = In.VT;
1671 EVT MemVT = VA.getLocVT();
1672 if (!VT.isVector() && MemVT.isVector()) {
1673 // Get load source type if scalarized.
1674 MemVT = MemVT.getVectorElementType();
1675 }
Tom Stellard78e01292013-07-23 01:47:58 +00001676
Jan Veselye5121f32014-10-14 20:05:26 +00001677 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001678 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1679 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1680 InVals.push_back(Register);
1681 continue;
1682 }
1683
Tom Stellard75aadc22012-12-11 21:25:42 +00001684 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001685 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001686
Matt Arsenaultfae02982014-03-17 18:58:11 +00001687 // i64 isn't a legal type, so the register type used ends up as i32, which
1688 // isn't expected here. It attempts to create this sextload, but it ends up
1689 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1690 // for <1 x i64>.
1691
Tom Stellardacfeebf2013-07-23 01:48:05 +00001692 // The first 36 bytes of the input buffer contains information about
1693 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001694 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1695 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1696 // FIXME: This should really check the extload type, but the handling of
1697 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001698
Matt Arsenault74ef2772014-08-13 18:14:11 +00001699 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1700 Ext = ISD::SEXTLOAD;
1701 }
1702
1703 // Compute the offset from the value.
1704 // XXX - I think PartOffset should give you this, but it seems to give the
1705 // size of the register which isn't useful.
1706
Andrew Trick05938a52015-02-16 18:10:47 +00001707 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001708 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001709 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001710
1711 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1712 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001713 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001714 DAG.getUNDEF(MVT::i32),
1715 PtrInfo,
1716 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001717
1718 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001719 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001720 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001721 }
1722 return Chain;
1723}
1724
Mehdi Amini44ede332015-07-09 02:09:04 +00001725EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1726 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001727 if (!VT.isVector())
1728 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001729 return VT.changeVectorElementTypeToInteger();
1730}
1731
Matt Arsenault209a7b92014-04-18 07:40:20 +00001732static SDValue CompactSwizzlableVector(
1733 SelectionDAG &DAG, SDValue VectorEntry,
1734 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001735 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1736 assert(RemapSwizzle.empty());
1737 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001738 VectorEntry.getOperand(0),
1739 VectorEntry.getOperand(1),
1740 VectorEntry.getOperand(2),
1741 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001742 };
1743
1744 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001745 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1746 // We mask write here to teach later passes that the ith element of this
1747 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1748 // break false dependencies and additionnaly make assembly easier to read.
1749 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001750 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1751 if (C->isZero()) {
1752 RemapSwizzle[i] = 4; // SEL_0
1753 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1754 } else if (C->isExactlyValue(1.0)) {
1755 RemapSwizzle[i] = 5; // SEL_1
1756 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1757 }
1758 }
1759
1760 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1761 continue;
1762 for (unsigned j = 0; j < i; j++) {
1763 if (NewBldVec[i] == NewBldVec[j]) {
1764 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1765 RemapSwizzle[i] = j;
1766 break;
1767 }
1768 }
1769 }
1770
1771 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001772 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001773}
1774
Benjamin Kramer193960c2013-06-11 13:32:25 +00001775static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1776 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001777 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1778 assert(RemapSwizzle.empty());
1779 SDValue NewBldVec[4] = {
1780 VectorEntry.getOperand(0),
1781 VectorEntry.getOperand(1),
1782 VectorEntry.getOperand(2),
1783 VectorEntry.getOperand(3)
1784 };
1785 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001786 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001787 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001788 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1789 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1790 ->getZExtValue();
1791 if (i == Idx)
1792 isUnmovable[Idx] = true;
1793 }
1794 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001795
1796 for (unsigned i = 0; i < 4; i++) {
1797 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1798 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1799 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001800 if (isUnmovable[Idx])
1801 continue;
1802 // Swap i and Idx
1803 std::swap(NewBldVec[Idx], NewBldVec[i]);
1804 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1805 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001806 }
1807 }
1808
1809 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001810 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001811}
1812
1813
1814SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001815 SDValue Swz[4], SelectionDAG &DAG,
1816 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001817 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1818 // Old -> New swizzle values
1819 DenseMap<unsigned, unsigned> SwizzleRemap;
1820
1821 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1822 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001823 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001824 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001825 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001826 }
1827
1828 SwizzleRemap.clear();
1829 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1830 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001831 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001832 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001833 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001834 }
1835
1836 return BuildVector;
1837}
1838
1839
Tom Stellard75aadc22012-12-11 21:25:42 +00001840//===----------------------------------------------------------------------===//
1841// Custom DAG Optimizations
1842//===----------------------------------------------------------------------===//
1843
1844SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1845 DAGCombinerInfo &DCI) const {
1846 SelectionDAG &DAG = DCI.DAG;
1847
1848 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001849 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001850 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1851 case ISD::FP_ROUND: {
1852 SDValue Arg = N->getOperand(0);
1853 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001854 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001855 Arg.getOperand(0));
1856 }
1857 break;
1858 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001859
1860 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1861 // (i32 select_cc f32, f32, -1, 0 cc)
1862 //
1863 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1864 // this to one of the SET*_DX10 instructions.
1865 case ISD::FP_TO_SINT: {
1866 SDValue FNeg = N->getOperand(0);
1867 if (FNeg.getOpcode() != ISD::FNEG) {
1868 return SDValue();
1869 }
1870 SDValue SelectCC = FNeg.getOperand(0);
1871 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1872 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1873 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1874 !isHWTrueValue(SelectCC.getOperand(2)) ||
1875 !isHWFalseValue(SelectCC.getOperand(3))) {
1876 return SDValue();
1877 }
1878
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001879 SDLoc dl(N);
1880 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001881 SelectCC.getOperand(0), // LHS
1882 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001883 DAG.getConstant(-1, dl, MVT::i32), // True
1884 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001885 SelectCC.getOperand(4)); // CC
1886
1887 break;
1888 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001889
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001890 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1891 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001892 case ISD::INSERT_VECTOR_ELT: {
1893 SDValue InVec = N->getOperand(0);
1894 SDValue InVal = N->getOperand(1);
1895 SDValue EltNo = N->getOperand(2);
1896 SDLoc dl(N);
1897
1898 // If the inserted element is an UNDEF, just use the input vector.
1899 if (InVal.getOpcode() == ISD::UNDEF)
1900 return InVec;
1901
1902 EVT VT = InVec.getValueType();
1903
1904 // If we can't generate a legal BUILD_VECTOR, exit
1905 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1906 return SDValue();
1907
1908 // Check that we know which element is being inserted
1909 if (!isa<ConstantSDNode>(EltNo))
1910 return SDValue();
1911 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1912
1913 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1914 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1915 // vector elements.
1916 SmallVector<SDValue, 8> Ops;
1917 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1918 Ops.append(InVec.getNode()->op_begin(),
1919 InVec.getNode()->op_end());
1920 } else if (InVec.getOpcode() == ISD::UNDEF) {
1921 unsigned NElts = VT.getVectorNumElements();
1922 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1923 } else {
1924 return SDValue();
1925 }
1926
1927 // Insert the element
1928 if (Elt < Ops.size()) {
1929 // All the operands of BUILD_VECTOR must have the same type;
1930 // we enforce that here.
1931 EVT OpVT = Ops[0].getValueType();
1932 if (InVal.getValueType() != OpVT)
1933 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1934 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1935 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1936 Ops[Elt] = InVal;
1937 }
1938
1939 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001940 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001941 }
1942
Tom Stellard365366f2013-01-23 02:09:06 +00001943 // Extract_vec (Build_vector) generated by custom lowering
1944 // also needs to be customly combined
1945 case ISD::EXTRACT_VECTOR_ELT: {
1946 SDValue Arg = N->getOperand(0);
1947 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1948 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1949 unsigned Element = Const->getZExtValue();
1950 return Arg->getOperand(Element);
1951 }
1952 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001953 if (Arg.getOpcode() == ISD::BITCAST &&
1954 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1955 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1956 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001957 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001958 Arg->getOperand(0).getOperand(Element));
1959 }
1960 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00001961 break;
Tom Stellard365366f2013-01-23 02:09:06 +00001962 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001963
1964 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001965 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001966 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00001967 return Ret;
1968
Tom Stellarde06163a2013-02-07 14:02:35 +00001969 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1970 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001971 //
1972 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1973 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001974 SDValue LHS = N->getOperand(0);
1975 if (LHS.getOpcode() != ISD::SELECT_CC) {
1976 return SDValue();
1977 }
1978
1979 SDValue RHS = N->getOperand(1);
1980 SDValue True = N->getOperand(2);
1981 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001982 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001983
1984 if (LHS.getOperand(2).getNode() != True.getNode() ||
1985 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001986 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001987 return SDValue();
1988 }
1989
Tom Stellard5e524892013-03-08 15:37:11 +00001990 switch (NCC) {
1991 default: return SDValue();
1992 case ISD::SETNE: return LHS;
1993 case ISD::SETEQ: {
1994 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1995 LHSCC = ISD::getSetCCInverse(LHSCC,
1996 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001997 if (DCI.isBeforeLegalizeOps() ||
1998 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1999 return DAG.getSelectCC(SDLoc(N),
2000 LHS.getOperand(0),
2001 LHS.getOperand(1),
2002 LHS.getOperand(2),
2003 LHS.getOperand(3),
2004 LHSCC);
2005 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002006 }
Tom Stellard5e524892013-03-08 15:37:11 +00002007 }
Tom Stellardcd428182013-09-28 02:50:38 +00002008 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002009 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002010
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002011 case AMDGPUISD::EXPORT: {
2012 SDValue Arg = N->getOperand(1);
2013 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2014 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002015
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002016 SDValue NewArgs[8] = {
2017 N->getOperand(0), // Chain
2018 SDValue(),
2019 N->getOperand(2), // ArrayBase
2020 N->getOperand(3), // Type
2021 N->getOperand(4), // SWZ_X
2022 N->getOperand(5), // SWZ_Y
2023 N->getOperand(6), // SWZ_Z
2024 N->getOperand(7) // SWZ_W
2025 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002026 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002027 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002028 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002029 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002030 case AMDGPUISD::TEXTURE_FETCH: {
2031 SDValue Arg = N->getOperand(1);
2032 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2033 break;
2034
2035 SDValue NewArgs[19] = {
2036 N->getOperand(0),
2037 N->getOperand(1),
2038 N->getOperand(2),
2039 N->getOperand(3),
2040 N->getOperand(4),
2041 N->getOperand(5),
2042 N->getOperand(6),
2043 N->getOperand(7),
2044 N->getOperand(8),
2045 N->getOperand(9),
2046 N->getOperand(10),
2047 N->getOperand(11),
2048 N->getOperand(12),
2049 N->getOperand(13),
2050 N->getOperand(14),
2051 N->getOperand(15),
2052 N->getOperand(16),
2053 N->getOperand(17),
2054 N->getOperand(18),
2055 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002056 SDLoc DL(N);
2057 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2058 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002059 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002060 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002061
2062 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002063}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002064
2065static bool
2066FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002067 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002068 const R600InstrInfo *TII =
2069 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002070 if (!Src.isMachineOpcode())
2071 return false;
2072 switch (Src.getMachineOpcode()) {
2073 case AMDGPU::FNEG_R600:
2074 if (!Neg.getNode())
2075 return false;
2076 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002077 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002078 return true;
2079 case AMDGPU::FABS_R600:
2080 if (!Abs.getNode())
2081 return false;
2082 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002083 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002084 return true;
2085 case AMDGPU::CONST_COPY: {
2086 unsigned Opcode = ParentNode->getMachineOpcode();
2087 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2088
2089 if (!Sel.getNode())
2090 return false;
2091
2092 SDValue CstOffset = Src.getOperand(0);
2093 if (ParentNode->getValueType(0).isVector())
2094 return false;
2095
2096 // Gather constants values
2097 int SrcIndices[] = {
2098 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2099 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2100 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2101 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2102 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2103 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2104 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2105 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2106 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2107 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2108 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2109 };
2110 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002111 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002112 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2113 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2114 continue;
2115 if (HasDst) {
2116 OtherSrcIdx--;
2117 OtherSelIdx--;
2118 }
2119 if (RegisterSDNode *Reg =
2120 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2121 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002122 ConstantSDNode *Cst
2123 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002124 Consts.push_back(Cst->getZExtValue());
2125 }
2126 }
2127 }
2128
Matt Arsenault37c12d72014-05-12 20:42:57 +00002129 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002130 Consts.push_back(Cst->getZExtValue());
2131 if (!TII->fitsConstReadLimitations(Consts)) {
2132 return false;
2133 }
2134
2135 Sel = CstOffset;
2136 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2137 return true;
2138 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002139 case AMDGPU::MOV_IMM_I32:
2140 case AMDGPU::MOV_IMM_F32: {
2141 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2142 uint64_t ImmValue = 0;
2143
2144
2145 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2146 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2147 float FloatValue = FPC->getValueAPF().convertToFloat();
2148 if (FloatValue == 0.0) {
2149 ImmReg = AMDGPU::ZERO;
2150 } else if (FloatValue == 0.5) {
2151 ImmReg = AMDGPU::HALF;
2152 } else if (FloatValue == 1.0) {
2153 ImmReg = AMDGPU::ONE;
2154 } else {
2155 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2156 }
2157 } else {
2158 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2159 uint64_t Value = C->getZExtValue();
2160 if (Value == 0) {
2161 ImmReg = AMDGPU::ZERO;
2162 } else if (Value == 1) {
2163 ImmReg = AMDGPU::ONE_INT;
2164 } else {
2165 ImmValue = Value;
2166 }
2167 }
2168
2169 // Check that we aren't already using an immediate.
2170 // XXX: It's possible for an instruction to have more than one
2171 // immediate operand, but this is not supported yet.
2172 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2173 if (!Imm.getNode())
2174 return false;
2175 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2176 assert(C);
2177 if (C->getZExtValue())
2178 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002179 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002180 }
2181 Src = DAG.getRegister(ImmReg, MVT::i32);
2182 return true;
2183 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002184 default:
2185 return false;
2186 }
2187}
2188
2189
2190/// \brief Fold the instructions after selecting them
2191SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2192 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002193 const R600InstrInfo *TII =
2194 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002195 if (!Node->isMachineOpcode())
2196 return Node;
2197 unsigned Opcode = Node->getMachineOpcode();
2198 SDValue FakeOp;
2199
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002200 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002201
2202 if (Opcode == AMDGPU::DOT_4) {
2203 int OperandIdx[] = {
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002212 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002213 int NegIdx[] = {
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2222 };
2223 int AbsIdx[] = {
2224 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2228 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2229 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2230 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2231 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2232 };
2233 for (unsigned i = 0; i < 8; i++) {
2234 if (OperandIdx[i] < 0)
2235 return Node;
2236 SDValue &Src = Ops[OperandIdx[i] - 1];
2237 SDValue &Neg = Ops[NegIdx[i] - 1];
2238 SDValue &Abs = Ops[AbsIdx[i] - 1];
2239 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2240 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2241 if (HasDst)
2242 SelIdx--;
2243 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002244 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2245 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2246 }
2247 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2248 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2249 SDValue &Src = Ops[i];
2250 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002251 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2252 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002253 } else if (Opcode == AMDGPU::CLAMP_R600) {
2254 SDValue Src = Node->getOperand(0);
2255 if (!Src.isMachineOpcode() ||
2256 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2257 return Node;
2258 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2259 AMDGPU::OpName::clamp);
2260 if (ClampIdx < 0)
2261 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002262 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002263 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002264 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2265 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2266 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002267 } else {
2268 if (!TII->hasInstrModifiers(Opcode))
2269 return Node;
2270 int OperandIdx[] = {
2271 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2272 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2273 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2274 };
2275 int NegIdx[] = {
2276 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2277 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2278 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2279 };
2280 int AbsIdx[] = {
2281 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2282 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2283 -1
2284 };
2285 for (unsigned i = 0; i < 3; i++) {
2286 if (OperandIdx[i] < 0)
2287 return Node;
2288 SDValue &Src = Ops[OperandIdx[i] - 1];
2289 SDValue &Neg = Ops[NegIdx[i] - 1];
2290 SDValue FakeAbs;
2291 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2292 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2293 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002294 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2295 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002296 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002297 ImmIdx--;
2298 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002299 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002300 SDValue &Imm = Ops[ImmIdx];
2301 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002302 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2303 }
2304 }
2305
2306 return Node;
2307}