blob: c8dcbe50260b5a1b2b5e5f0087f67706777a0365 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Jan Veselyf97de002016-05-13 20:39:29 +0000272 case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
273 //TODO: Perhaps combine this instruction with the next if possible
274 auto MIB = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
275 MI->getOperand(0).getReg(),
276 AMDGPU::ALU_LITERAL_X);
277 int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
278 //TODO: Ugh this is rather ugly
279 MIB->getOperand(Idx) = MI->getOperand(1);
280 break;
281 }
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000282 case AMDGPU::CONST_COPY: {
283 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
284 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000285 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000286 MI->getOperand(1).getImm());
287 break;
288 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000289
290 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000291 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000292 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000296 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000297 break;
298 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000299 case AMDGPU::RAT_STORE_TYPED_eg: {
300 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
301 .addOperand(MI->getOperand(0))
302 .addOperand(MI->getOperand(1))
303 .addOperand(MI->getOperand(2))
304 .addImm(isEOP(I)); // Set End of program bit
305 break;
306 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000307
Tom Stellard75aadc22012-12-11 21:25:42 +0000308 case AMDGPU::TXD: {
309 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
310 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000311 MachineOperand &RID = MI->getOperand(4);
312 MachineOperand &SID = MI->getOperand(5);
313 unsigned TextureId = MI->getOperand(6).getImm();
314 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
315 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000316
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000317 switch (TextureId) {
318 case 5: // Rect
319 CTX = CTY = 0;
320 break;
321 case 6: // Shadow1D
322 SrcW = SrcZ;
323 break;
324 case 7: // Shadow2D
325 SrcW = SrcZ;
326 break;
327 case 8: // ShadowRect
328 CTX = CTY = 0;
329 SrcW = SrcZ;
330 break;
331 case 9: // 1DArray
332 SrcZ = SrcY;
333 CTZ = 0;
334 break;
335 case 10: // 2DArray
336 CTZ = 0;
337 break;
338 case 11: // Shadow1DArray
339 SrcZ = SrcY;
340 CTZ = 0;
341 break;
342 case 12: // Shadow2DArray
343 CTZ = 0;
344 break;
345 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000346 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
347 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000348 .addImm(SrcX)
349 .addImm(SrcY)
350 .addImm(SrcZ)
351 .addImm(SrcW)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(1)
357 .addImm(2)
358 .addImm(3)
359 .addOperand(RID)
360 .addOperand(SID)
361 .addImm(CTX)
362 .addImm(CTY)
363 .addImm(CTZ)
364 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000365 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
366 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
385 .addOperand(MI->getOperand(0))
386 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000387 .addImm(SrcX)
388 .addImm(SrcY)
389 .addImm(SrcZ)
390 .addImm(SrcW)
391 .addImm(0)
392 .addImm(0)
393 .addImm(0)
394 .addImm(0)
395 .addImm(1)
396 .addImm(2)
397 .addImm(3)
398 .addOperand(RID)
399 .addOperand(SID)
400 .addImm(CTX)
401 .addImm(CTY)
402 .addImm(CTZ)
403 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000404 .addReg(T0, RegState::Implicit)
405 .addReg(T1, RegState::Implicit);
406 break;
407 }
408
409 case AMDGPU::TXD_SHADOW: {
410 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
411 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000412 MachineOperand &RID = MI->getOperand(4);
413 MachineOperand &SID = MI->getOperand(5);
414 unsigned TextureId = MI->getOperand(6).getImm();
415 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
416 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
417
418 switch (TextureId) {
419 case 5: // Rect
420 CTX = CTY = 0;
421 break;
422 case 6: // Shadow1D
423 SrcW = SrcZ;
424 break;
425 case 7: // Shadow2D
426 SrcW = SrcZ;
427 break;
428 case 8: // ShadowRect
429 CTX = CTY = 0;
430 SrcW = SrcZ;
431 break;
432 case 9: // 1DArray
433 SrcZ = SrcY;
434 CTZ = 0;
435 break;
436 case 10: // 2DArray
437 CTZ = 0;
438 break;
439 case 11: // Shadow1DArray
440 SrcZ = SrcY;
441 CTZ = 0;
442 break;
443 case 12: // Shadow2DArray
444 CTZ = 0;
445 break;
446 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000447
448 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
449 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000450 .addImm(SrcX)
451 .addImm(SrcY)
452 .addImm(SrcZ)
453 .addImm(SrcW)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(0)
458 .addImm(1)
459 .addImm(2)
460 .addImm(3)
461 .addOperand(RID)
462 .addOperand(SID)
463 .addImm(CTX)
464 .addImm(CTY)
465 .addImm(CTZ)
466 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000467 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
468 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000469 .addImm(SrcX)
470 .addImm(SrcY)
471 .addImm(SrcZ)
472 .addImm(SrcW)
473 .addImm(0)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(1)
478 .addImm(2)
479 .addImm(3)
480 .addOperand(RID)
481 .addOperand(SID)
482 .addImm(CTX)
483 .addImm(CTY)
484 .addImm(CTZ)
485 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
487 .addOperand(MI->getOperand(0))
488 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000489 .addImm(SrcX)
490 .addImm(SrcY)
491 .addImm(SrcZ)
492 .addImm(SrcW)
493 .addImm(0)
494 .addImm(0)
495 .addImm(0)
496 .addImm(0)
497 .addImm(1)
498 .addImm(2)
499 .addImm(3)
500 .addOperand(RID)
501 .addOperand(SID)
502 .addImm(CTX)
503 .addImm(CTY)
504 .addImm(CTZ)
505 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 .addReg(T0, RegState::Implicit)
507 .addReg(T1, RegState::Implicit);
508 break;
509 }
510
511 case AMDGPU::BRANCH:
512 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000513 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 break;
515
516 case AMDGPU::BRANCH_COND_f32: {
517 MachineInstr *NewMI =
518 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
519 AMDGPU::PREDICATE_BIT)
520 .addOperand(MI->getOperand(1))
521 .addImm(OPCODE_IS_NOT_ZERO)
522 .addImm(0); // Flags
523 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000524 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 .addOperand(MI->getOperand(0))
526 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
527 break;
528 }
529
530 case AMDGPU::BRANCH_COND_i32: {
531 MachineInstr *NewMI =
532 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
533 AMDGPU::PREDICATE_BIT)
534 .addOperand(MI->getOperand(1))
535 .addImm(OPCODE_IS_NOT_ZERO_INT)
536 .addImm(0); // Flags
537 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000538 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000539 .addOperand(MI->getOperand(0))
540 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
541 break;
542 }
543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 case AMDGPU::EG_ExportSwz:
545 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000546 // Instruction is left unmodified if its not the last one of its type
547 bool isLastInstructionOfItsType = true;
548 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000549 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000550 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000551 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000552 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
553 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
554 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
555 .getImm();
556 if (CurrentInstExportType == InstExportType) {
557 isLastInstructionOfItsType = false;
558 break;
559 }
560 }
561 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000562 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000563 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000564 return BB;
565 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
566 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
567 .addOperand(MI->getOperand(0))
568 .addOperand(MI->getOperand(1))
569 .addOperand(MI->getOperand(2))
570 .addOperand(MI->getOperand(3))
571 .addOperand(MI->getOperand(4))
572 .addOperand(MI->getOperand(5))
573 .addOperand(MI->getOperand(6))
574 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000575 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000576 break;
577 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000578 case AMDGPU::RETURN: {
579 // RETURN instructions must have the live-out registers as implicit uses,
580 // otherwise they appear dead.
581 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
582 MachineInstrBuilder MIB(*MF, MI);
583 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
584 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
585 return BB;
586 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000587 }
588
589 MI->eraseFromParent();
590 return BB;
591}
592
593//===----------------------------------------------------------------------===//
594// Custom DAG Lowering Operations
595//===----------------------------------------------------------------------===//
596
Tom Stellard75aadc22012-12-11 21:25:42 +0000597SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000598 MachineFunction &MF = DAG.getMachineFunction();
599 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000600 switch (Op.getOpcode()) {
601 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000602 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
603 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000604 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000605 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000606 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000607 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
608 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000609 case ISD::FCOS:
610 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000611 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000612 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000613 case ISD::LOAD: {
614 SDValue Result = LowerLOAD(Op, DAG);
615 assert((!Result.getNode() ||
616 Result.getNode()->getNumValues() == 2) &&
617 "Load should return a value and a chain");
618 return Result;
619 }
620
Matt Arsenault1d555c42014-06-23 18:00:55 +0000621 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000622 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000623 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000624 case ISD::INTRINSIC_VOID: {
625 SDValue Chain = Op.getOperand(0);
626 unsigned IntrinsicID =
627 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
628 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000629 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000630 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000631 const SDValue Args[8] = {
632 Chain,
633 Op.getOperand(2), // Export Value
634 Op.getOperand(3), // ArrayBase
635 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000636 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
637 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
638 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
639 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000640 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000641 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000642 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000643
Tom Stellard75aadc22012-12-11 21:25:42 +0000644 // default for switch(IntrinsicID)
645 default: break;
646 }
647 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
648 break;
649 }
650 case ISD::INTRINSIC_WO_CHAIN: {
651 unsigned IntrinsicID =
652 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
653 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000654 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000655 switch(IntrinsicID) {
656 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000657 case AMDGPUIntrinsic::R600_interp_xy:
658 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000659 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000660 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000661 SDValue RegisterINode = Op.getOperand(2);
662 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000663
Vincent Lejeunef143af32013-11-11 22:10:24 +0000664 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000665 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000666 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000667 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000668 else
669 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000670 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000671 RegisterJNode, RegisterINode);
Ahmed Bougacha128f8732016-04-26 21:15:30 +0000672 return DAG.getBuildVector(MVT::v2f32, DL,
673 {SDValue(interp, 0), SDValue(interp, 1)});
Tom Stellard75aadc22012-12-11 21:25:42 +0000674 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000675 case AMDGPUIntrinsic::r600_tex:
676 case AMDGPUIntrinsic::r600_texc:
677 case AMDGPUIntrinsic::r600_txl:
678 case AMDGPUIntrinsic::r600_txlc:
679 case AMDGPUIntrinsic::r600_txb:
680 case AMDGPUIntrinsic::r600_txbc:
681 case AMDGPUIntrinsic::r600_txf:
682 case AMDGPUIntrinsic::r600_txq:
683 case AMDGPUIntrinsic::r600_ddx:
684 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000685 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000686 unsigned TextureOp;
687 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000688 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000689 TextureOp = 0;
690 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000691 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000692 TextureOp = 1;
693 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000694 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000695 TextureOp = 2;
696 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000697 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000698 TextureOp = 3;
699 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000700 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000701 TextureOp = 4;
702 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000703 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000704 TextureOp = 5;
705 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000706 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000707 TextureOp = 6;
708 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000709 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000710 TextureOp = 7;
711 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000712 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000713 TextureOp = 8;
714 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000715 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 TextureOp = 9;
717 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000718 case AMDGPUIntrinsic::R600_ldptr:
719 TextureOp = 10;
720 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 default:
722 llvm_unreachable("Unknow Texture Operation");
723 }
724
725 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000726 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000727 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000728 DAG.getConstant(0, DL, MVT::i32),
729 DAG.getConstant(1, DL, MVT::i32),
730 DAG.getConstant(2, DL, MVT::i32),
731 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000732 Op.getOperand(2),
733 Op.getOperand(3),
734 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000735 DAG.getConstant(0, DL, MVT::i32),
736 DAG.getConstant(1, DL, MVT::i32),
737 DAG.getConstant(2, DL, MVT::i32),
738 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000739 Op.getOperand(5),
740 Op.getOperand(6),
741 Op.getOperand(7),
742 Op.getOperand(8),
743 Op.getOperand(9),
744 Op.getOperand(10)
745 };
Craig Topper48d114b2014-04-26 18:35:24 +0000746 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000747 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000748 case AMDGPUIntrinsic::AMDGPU_dp4: {
749 SDValue Args[8] = {
750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000751 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000753 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000756 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000757 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000758 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000759 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000760 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000761 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000762 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000763 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000764 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000765 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000766 };
Craig Topper48d114b2014-04-26 18:35:24 +0000767 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000768 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000769
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000786 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000787 return LowerImplicitParameter(DAG, VT, DL, 8);
788
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000789 case Intrinsic::r600_read_workdim:
790 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000791 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
792 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
793 }
Jan Veselye5121f32014-10-14 20:05:26 +0000794
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
797 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
800 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
803 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
806 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
809 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000810 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
812 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000813
814 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000815 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenault79963e82016-02-13 01:03:00 +0000816 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000817
818 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000819 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000820 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
821 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 }
823 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
824 break;
825 }
826 } // end switch(Op.getOpcode())
827 return SDValue();
828}
829
830void R600TargetLowering::ReplaceNodeResults(SDNode *N,
831 SmallVectorImpl<SDValue> &Results,
832 SelectionDAG &DAG) const {
833 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000834 default:
835 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
836 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000837 case ISD::FP_TO_UINT:
838 if (N->getValueType(0) == MVT::i1) {
839 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
840 return;
841 }
842 // Fall-through. Since we don't care about out of bounds values
843 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
844 // considers some extra cases which are not necessary here.
845 case ISD::FP_TO_SINT: {
846 SDValue Result;
847 if (expandFP_TO_SINT(N, Result, DAG))
848 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000849 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000850 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000851 case ISD::SDIVREM: {
852 SDValue Op = SDValue(N, 1);
853 SDValue RES = LowerSDIVREM(Op, DAG);
854 Results.push_back(RES);
855 Results.push_back(RES.getValue(1));
856 break;
857 }
858 case ISD::UDIVREM: {
859 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000860 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000861 break;
862 }
863 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000864}
865
Tom Stellard880a80a2014-06-17 16:53:14 +0000866SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
867 SDValue Vector) const {
868
869 SDLoc DL(Vector);
870 EVT VecVT = Vector.getValueType();
871 EVT EltVT = VecVT.getVectorElementType();
872 SmallVector<SDValue, 8> Args;
873
874 for (unsigned i = 0, e = VecVT.getVectorNumElements();
875 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000876 Args.push_back(DAG.getNode(
877 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
878 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000879 }
880
881 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
882}
883
884SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
885 SelectionDAG &DAG) const {
886
887 SDLoc DL(Op);
888 SDValue Vector = Op.getOperand(0);
889 SDValue Index = Op.getOperand(1);
890
891 if (isa<ConstantSDNode>(Index) ||
892 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
893 return Op;
894
895 Vector = vectorToVerticalVector(DAG, Vector);
896 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
897 Vector, Index);
898}
899
900SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
901 SelectionDAG &DAG) const {
902 SDLoc DL(Op);
903 SDValue Vector = Op.getOperand(0);
904 SDValue Value = Op.getOperand(1);
905 SDValue Index = Op.getOperand(2);
906
907 if (isa<ConstantSDNode>(Index) ||
908 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
909 return Op;
910
911 Vector = vectorToVerticalVector(DAG, Vector);
912 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
913 Vector, Value, Index);
914 return vectorToVerticalVector(DAG, Insert);
915}
916
Tom Stellard27233b72016-05-02 18:05:17 +0000917SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
918 SDValue Op,
919 SelectionDAG &DAG) const {
920
921 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
922 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
923 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
924
925 const DataLayout &DL = DAG.getDataLayout();
926 const GlobalValue *GV = GSD->getGlobal();
Tom Stellard27233b72016-05-02 18:05:17 +0000927 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
928
Jan Veselyf97de002016-05-13 20:39:29 +0000929 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
930 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
Tom Stellard27233b72016-05-02 18:05:17 +0000931}
932
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000933SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
934 // On hw >= R700, COS/SIN input must be between -1. and 1.
935 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
936 EVT VT = Op.getValueType();
937 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000938 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000939
940 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000941 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
942 DAG.getNode(ISD::FADD, DL, VT,
943 DAG.getNode(ISD::FMUL, DL, VT, Arg,
944 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
945 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000946 unsigned TrigNode;
947 switch (Op.getOpcode()) {
948 case ISD::FCOS:
949 TrigNode = AMDGPUISD::COS_HW;
950 break;
951 case ISD::FSIN:
952 TrigNode = AMDGPUISD::SIN_HW;
953 break;
954 default:
955 llvm_unreachable("Wrong trig opcode");
956 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000957 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
958 DAG.getNode(ISD::FADD, DL, VT, FractPart,
959 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000960 if (Gen >= AMDGPUSubtarget::R700)
961 return TrigVal;
962 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000963 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
964 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000965}
966
Jan Vesely25f36272014-06-18 12:27:13 +0000967SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
968 SDLoc DL(Op);
969 EVT VT = Op.getValueType();
970
971 SDValue Lo = Op.getOperand(0);
972 SDValue Hi = Op.getOperand(1);
973 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000974 SDValue Zero = DAG.getConstant(0, DL, VT);
975 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000976
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000977 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
978 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000979 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
980 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
981
982 // The dance around Width1 is necessary for 0 special case.
983 // Without it the CompShift might be 32, producing incorrect results in
984 // Overflow. So we do the shift in two steps, the alternative is to
985 // add a conditional to filter the special case.
986
987 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
988 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
989
990 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
991 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
992 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
993
994 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
995 SDValue LoBig = Zero;
996
997 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
998 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
999
1000 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1001}
1002
Jan Vesely900ff2e2014-06-18 12:27:15 +00001003SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1004 SDLoc DL(Op);
1005 EVT VT = Op.getValueType();
1006
1007 SDValue Lo = Op.getOperand(0);
1008 SDValue Hi = Op.getOperand(1);
1009 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001010 SDValue Zero = DAG.getConstant(0, DL, VT);
1011 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001012
Jan Veselyecf51332014-06-18 12:27:17 +00001013 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1014
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001015 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1016 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001017 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1018 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1019
1020 // The dance around Width1 is necessary for 0 special case.
1021 // Without it the CompShift might be 32, producing incorrect results in
1022 // Overflow. So we do the shift in two steps, the alternative is to
1023 // add a conditional to filter the special case.
1024
1025 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1026 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1027
Jan Veselyecf51332014-06-18 12:27:17 +00001028 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001029 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1030 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1031
Jan Veselyecf51332014-06-18 12:27:17 +00001032 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1033 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001034
1035 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1036 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1037
1038 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1039}
1040
Jan Vesely808fff52015-04-30 17:15:56 +00001041SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1042 unsigned mainop, unsigned ovf) const {
1043 SDLoc DL(Op);
1044 EVT VT = Op.getValueType();
1045
1046 SDValue Lo = Op.getOperand(0);
1047 SDValue Hi = Op.getOperand(1);
1048
1049 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1050 // Extend sign.
1051 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1052 DAG.getValueType(MVT::i1));
1053
1054 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1055
1056 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1057}
1058
Tom Stellard75aadc22012-12-11 21:25:42 +00001059SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001060 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001061 return DAG.getNode(
1062 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001063 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001064 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001065 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001066 DAG.getCondCode(ISD::SETNE)
1067 );
1068}
1069
Tom Stellard75aadc22012-12-11 21:25:42 +00001070SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001071 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001072 unsigned DwordOffset) const {
1073 unsigned ByteOffset = DwordOffset * 4;
1074 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001075 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001076
1077 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1078 assert(isInt<16>(ByteOffset));
1079
1080 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001081 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001082 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1083 false, false, false, 0);
1084}
1085
Tom Stellard75aadc22012-12-11 21:25:42 +00001086bool R600TargetLowering::isZero(SDValue Op) const {
1087 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1088 return Cst->isNullValue();
1089 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1090 return CstFP->isZero();
1091 } else {
1092 return false;
1093 }
1094}
1095
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001096bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1097 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1098 return CFP->isExactlyValue(1.0);
1099 }
1100 return isAllOnesConstant(Op);
1101}
1102
1103bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1104 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1105 return CFP->getValueAPF().isZero();
1106 }
1107 return isNullConstant(Op);
1108}
1109
Tom Stellard75aadc22012-12-11 21:25:42 +00001110SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001111 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001112 EVT VT = Op.getValueType();
1113
1114 SDValue LHS = Op.getOperand(0);
1115 SDValue RHS = Op.getOperand(1);
1116 SDValue True = Op.getOperand(2);
1117 SDValue False = Op.getOperand(3);
1118 SDValue CC = Op.getOperand(4);
1119 SDValue Temp;
1120
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001121 if (VT == MVT::f32) {
1122 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1123 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1124 if (MinMax)
1125 return MinMax;
1126 }
1127
Tom Stellard75aadc22012-12-11 21:25:42 +00001128 // LHS and RHS are guaranteed to be the same value type
1129 EVT CompareVT = LHS.getValueType();
1130
1131 // Check if we can lower this to a native operation.
1132
Tom Stellard2add82d2013-03-08 15:37:09 +00001133 // Try to lower to a SET* instruction:
1134 //
1135 // SET* can match the following patterns:
1136 //
Tom Stellardcd428182013-09-28 02:50:38 +00001137 // select_cc f32, f32, -1, 0, cc_supported
1138 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1139 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001140 //
1141
1142 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001143 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1144 ISD::CondCode InverseCC =
1145 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001146 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1147 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1148 std::swap(False, True);
1149 CC = DAG.getCondCode(InverseCC);
1150 } else {
1151 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1152 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1153 std::swap(False, True);
1154 std::swap(LHS, RHS);
1155 CC = DAG.getCondCode(SwapInvCC);
1156 }
1157 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001158 }
1159
1160 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1161 (CompareVT == VT || VT == MVT::i32)) {
1162 // This can be matched by a SET* instruction.
1163 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1164 }
1165
Tom Stellard75aadc22012-12-11 21:25:42 +00001166 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001167 //
1168 // CND* can match the following patterns:
1169 //
Tom Stellardcd428182013-09-28 02:50:38 +00001170 // select_cc f32, 0.0, f32, f32, cc_supported
1171 // select_cc f32, 0.0, i32, i32, cc_supported
1172 // select_cc i32, 0, f32, f32, cc_supported
1173 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001174 //
Tom Stellardcd428182013-09-28 02:50:38 +00001175
1176 // Try to move the zero value to the RHS
1177 if (isZero(LHS)) {
1178 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1179 // Try swapping the operands
1180 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1181 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1182 std::swap(LHS, RHS);
1183 CC = DAG.getCondCode(CCSwapped);
1184 } else {
1185 // Try inverting the conditon and then swapping the operands
1186 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1187 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1188 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1189 std::swap(True, False);
1190 std::swap(LHS, RHS);
1191 CC = DAG.getCondCode(CCSwapped);
1192 }
1193 }
1194 }
1195 if (isZero(RHS)) {
1196 SDValue Cond = LHS;
1197 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001198 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1199 if (CompareVT != VT) {
1200 // Bitcast True / False to the correct types. This will end up being
1201 // a nop, but it allows us to define only a single pattern in the
1202 // .TD files for each CND* instruction rather than having to have
1203 // one pattern for integer True/False and one for fp True/False
1204 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1205 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1206 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001207
1208 switch (CCOpcode) {
1209 case ISD::SETONE:
1210 case ISD::SETUNE:
1211 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001212 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1213 Temp = True;
1214 True = False;
1215 False = Temp;
1216 break;
1217 default:
1218 break;
1219 }
1220 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1221 Cond, Zero,
1222 True, False,
1223 DAG.getCondCode(CCOpcode));
1224 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1225 }
1226
Tom Stellard75aadc22012-12-11 21:25:42 +00001227 // If we make it this for it means we have no native instructions to handle
1228 // this SELECT_CC, so we must lower it.
1229 SDValue HWTrue, HWFalse;
1230
1231 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001232 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1233 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001234 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001235 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1236 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001237 }
1238 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001239 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001240 }
1241
1242 // Lower this unsupported SELECT_CC into a combination of two supported
1243 // SELECT_CC operations.
1244 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1245
1246 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1247 Cond, HWFalse,
1248 True, False,
1249 DAG.getCondCode(ISD::SETNE));
1250}
1251
Alp Tokercb402912014-01-24 17:20:08 +00001252/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001253/// convert these pointers to a register index. Each register holds
1254/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1255/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1256/// for indirect addressing.
1257SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1258 unsigned StackWidth,
1259 SelectionDAG &DAG) const {
1260 unsigned SRLPad;
1261 switch(StackWidth) {
1262 case 1:
1263 SRLPad = 2;
1264 break;
1265 case 2:
1266 SRLPad = 3;
1267 break;
1268 case 4:
1269 SRLPad = 4;
1270 break;
1271 default: llvm_unreachable("Invalid stack width");
1272 }
1273
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001274 SDLoc DL(Ptr);
1275 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1276 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001277}
1278
1279void R600TargetLowering::getStackAddress(unsigned StackWidth,
1280 unsigned ElemIdx,
1281 unsigned &Channel,
1282 unsigned &PtrIncr) const {
1283 switch (StackWidth) {
1284 default:
1285 case 1:
1286 Channel = 0;
1287 if (ElemIdx > 0) {
1288 PtrIncr = 1;
1289 } else {
1290 PtrIncr = 0;
1291 }
1292 break;
1293 case 2:
1294 Channel = ElemIdx % 2;
1295 if (ElemIdx == 2) {
1296 PtrIncr = 1;
1297 } else {
1298 PtrIncr = 0;
1299 }
1300 break;
1301 case 4:
1302 Channel = ElemIdx;
1303 PtrIncr = 0;
1304 break;
1305 }
1306}
1307
Matt Arsenault95245662016-02-11 05:32:46 +00001308SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1309 SelectionDAG &DAG) const {
1310 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001311
Matt Arsenault95245662016-02-11 05:32:46 +00001312 unsigned Mask = 0;
1313 if (Store->getMemoryVT() == MVT::i8) {
1314 Mask = 0xff;
1315 } else if (Store->getMemoryVT() == MVT::i16) {
1316 Mask = 0xffff;
1317 }
1318
1319 SDValue Chain = Store->getChain();
1320 SDValue BasePtr = Store->getBasePtr();
1321 EVT MemVT = Store->getMemoryVT();
1322
1323 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1324 DAG.getConstant(2, DL, MVT::i32));
1325 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1326 Chain, Ptr,
1327 DAG.getTargetConstant(0, DL, MVT::i32));
1328
1329 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1330 DAG.getConstant(0x3, DL, MVT::i32));
1331
1332 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1333 DAG.getConstant(3, DL, MVT::i32));
1334
1335 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1336 Store->getValue());
1337
1338 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1339
1340 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1341 MaskedValue, ShiftAmt);
1342
1343 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1344 DAG.getConstant(Mask, DL, MVT::i32),
1345 ShiftAmt);
1346 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1347 DAG.getConstant(0xffffffff, DL, MVT::i32));
1348 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1349
1350 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1351 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1352 Chain, Value, Ptr,
1353 DAG.getTargetConstant(0, DL, MVT::i32));
1354}
1355
1356SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1357 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001358 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001359
Matt Arsenault95245662016-02-11 05:32:46 +00001360 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1361 unsigned AS = StoreNode->getAddressSpace();
1362 SDValue Value = StoreNode->getValue();
1363 EVT ValueVT = Value.getValueType();
1364
1365 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1366 ValueVT.isVector()) {
1367 return SplitVectorStore(Op, DAG);
1368 }
1369
1370 SDLoc DL(Op);
1371 SDValue Chain = StoreNode->getChain();
1372 SDValue Ptr = StoreNode->getBasePtr();
1373
1374 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001375 if (StoreNode->isTruncatingStore()) {
1376 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001377 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001378 EVT MemVT = StoreNode->getMemoryVT();
1379 SDValue MaskConstant;
1380 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001381 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001382 } else {
1383 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001384 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001385 }
1386 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001387 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001388 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001389 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001390 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1391 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001392 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001393 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1394 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1395 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1396 // vector instead.
1397 SDValue Src[4] = {
1398 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001399 DAG.getConstant(0, DL, MVT::i32),
1400 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001401 Mask
1402 };
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001403 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001404 SDValue Args[3] = { Chain, Input, DWordAddr };
1405 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001406 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001407 StoreNode->getMemOperand());
1408 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001409 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001410 // Convert pointer from byte address to dword address.
1411 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1412 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001413 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001414
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001415 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001416 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001417 } else {
1418 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1419 }
1420 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001421 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001422 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001423
Matt Arsenault95245662016-02-11 05:32:46 +00001424 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001425 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001426
Matt Arsenault95245662016-02-11 05:32:46 +00001427 EVT MemVT = StoreNode->getMemoryVT();
1428 if (MemVT.bitsLT(MVT::i32))
1429 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001430
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001431 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001432 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001433 const AMDGPUFrameLowering *TFL =
1434 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001435 unsigned StackWidth = TFL->getStackWidth(MF);
1436
1437 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1438
1439 if (ValueVT.isVector()) {
1440 unsigned NumElemVT = ValueVT.getVectorNumElements();
1441 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001442 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001443
1444 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1445 "vector width in load");
1446
1447 for (unsigned i = 0; i < NumElemVT; ++i) {
1448 unsigned Channel, PtrIncr;
1449 getStackAddress(StackWidth, i, Channel, PtrIncr);
1450 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001451 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001452 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001453 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001454
1455 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1456 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001457 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001458 }
Craig Topper48d114b2014-04-26 18:35:24 +00001459 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001460 } else {
1461 if (ValueVT == MVT::i8) {
1462 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1463 }
1464 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001465 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001466 }
1467
1468 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001469}
1470
Tom Stellard365366f2013-01-23 02:09:06 +00001471// return (512 + (kc_bank << 12)
1472static int
1473ConstantAddressBlock(unsigned AddressSpace) {
1474 switch (AddressSpace) {
1475 case AMDGPUAS::CONSTANT_BUFFER_0:
1476 return 512;
1477 case AMDGPUAS::CONSTANT_BUFFER_1:
1478 return 512 + 4096;
1479 case AMDGPUAS::CONSTANT_BUFFER_2:
1480 return 512 + 4096 * 2;
1481 case AMDGPUAS::CONSTANT_BUFFER_3:
1482 return 512 + 4096 * 3;
1483 case AMDGPUAS::CONSTANT_BUFFER_4:
1484 return 512 + 4096 * 4;
1485 case AMDGPUAS::CONSTANT_BUFFER_5:
1486 return 512 + 4096 * 5;
1487 case AMDGPUAS::CONSTANT_BUFFER_6:
1488 return 512 + 4096 * 6;
1489 case AMDGPUAS::CONSTANT_BUFFER_7:
1490 return 512 + 4096 * 7;
1491 case AMDGPUAS::CONSTANT_BUFFER_8:
1492 return 512 + 4096 * 8;
1493 case AMDGPUAS::CONSTANT_BUFFER_9:
1494 return 512 + 4096 * 9;
1495 case AMDGPUAS::CONSTANT_BUFFER_10:
1496 return 512 + 4096 * 10;
1497 case AMDGPUAS::CONSTANT_BUFFER_11:
1498 return 512 + 4096 * 11;
1499 case AMDGPUAS::CONSTANT_BUFFER_12:
1500 return 512 + 4096 * 12;
1501 case AMDGPUAS::CONSTANT_BUFFER_13:
1502 return 512 + 4096 * 13;
1503 case AMDGPUAS::CONSTANT_BUFFER_14:
1504 return 512 + 4096 * 14;
1505 case AMDGPUAS::CONSTANT_BUFFER_15:
1506 return 512 + 4096 * 15;
1507 default:
1508 return -1;
1509 }
1510}
1511
Matt Arsenault6dfda962016-02-10 18:21:39 +00001512SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1513 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001514 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001515 LoadSDNode *Load = cast<LoadSDNode>(Op);
1516 ISD::LoadExtType ExtType = Load->getExtensionType();
1517 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001518
Matt Arsenault6dfda962016-02-10 18:21:39 +00001519 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1520 // register (2-)byte extract.
1521
1522 // Get Register holding the target.
1523 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1524 DAG.getConstant(2, DL, MVT::i32));
1525 // Load the Register.
1526 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1527 Load->getChain(),
1528 Ptr,
1529 DAG.getTargetConstant(0, DL, MVT::i32),
1530 Op.getOperand(2));
1531
1532 // Get offset within the register.
1533 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1534 Load->getBasePtr(),
1535 DAG.getConstant(0x3, DL, MVT::i32));
1536
1537 // Bit offset of target byte (byteIdx * 8).
1538 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1539 DAG.getConstant(3, DL, MVT::i32));
1540
1541 // Shift to the right.
1542 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1543
1544 // Eliminate the upper bits by setting them to ...
1545 EVT MemEltVT = MemVT.getScalarType();
1546
1547 // ... ones.
1548 if (ExtType == ISD::SEXTLOAD) {
1549 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1550
1551 SDValue Ops[] = {
1552 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1553 Load->getChain()
1554 };
1555
1556 return DAG.getMergeValues(Ops, DL);
1557 }
1558
1559 // ... or zeros.
1560 SDValue Ops[] = {
1561 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1562 Load->getChain()
1563 };
1564
1565 return DAG.getMergeValues(Ops, DL);
1566}
1567
1568SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1569 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1570 unsigned AS = LoadNode->getAddressSpace();
1571 EVT MemVT = LoadNode->getMemoryVT();
1572 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1573
1574 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1575 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1576 return lowerPrivateExtLoad(Op, DAG);
1577 }
1578
1579 SDLoc DL(Op);
1580 EVT VT = Op.getValueType();
1581 SDValue Chain = LoadNode->getChain();
1582 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001583
Tom Stellard35bb18c2013-08-26 15:06:04 +00001584 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1585 SDValue MergedValues[2] = {
Matt Arsenault9c499c32016-04-14 23:31:26 +00001586 scalarizeVectorLoad(LoadNode, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001587 Chain
1588 };
Craig Topper64941d92014-04-27 19:20:57 +00001589 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001590 }
1591
Tom Stellard365366f2013-01-23 02:09:06 +00001592 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001593 if (ConstantBlock > -1 &&
1594 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1595 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001596 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001597 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1598 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001599 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001600 SDValue Slots[4];
1601 for (unsigned i = 0; i < 4; i++) {
1602 // We want Const position encoded with the following formula :
1603 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1604 // const_index is Ptr computed by llvm using an alignment of 16.
1605 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1606 // then div by 4 at the ISel step
1607 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001608 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001609 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1610 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001611 EVT NewVT = MVT::v4i32;
1612 unsigned NumElements = 4;
1613 if (VT.isVector()) {
1614 NewVT = VT;
1615 NumElements = VT.getVectorNumElements();
1616 }
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001617 Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001618 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001619 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001620 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001621 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1622 DAG.getConstant(4, DL, MVT::i32)),
1623 DAG.getConstant(LoadNode->getAddressSpace() -
1624 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001625 );
1626 }
1627
1628 if (!VT.isVector()) {
1629 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001630 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001631 }
1632
1633 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001634 Result,
1635 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001636 };
Craig Topper64941d92014-04-27 19:20:57 +00001637 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001638 }
1639
Matt Arsenault6dfda962016-02-10 18:21:39 +00001640 SDValue LoweredLoad;
1641
Matt Arsenault909d0c02013-10-30 23:43:29 +00001642 // For most operations returning SDValue() will result in the node being
1643 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1644 // need to manually expand loads that may be legal in some address spaces and
1645 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1646 // compute shaders, since the data is sign extended when it is uploaded to the
1647 // buffer. However SEXT loads from other address spaces are not supported, so
1648 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001649 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1650 EVT MemVT = LoadNode->getMemoryVT();
1651 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001652 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1653 LoadNode->getPointerInfo(), MemVT,
1654 LoadNode->isVolatile(),
1655 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001656 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001657 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001658 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1659 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001660
Jan Veselyb670d372015-05-26 18:07:22 +00001661 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001662 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001663 }
1664
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001665 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1666 return SDValue();
1667 }
1668
1669 // Lowering for indirect addressing
1670 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001671 const AMDGPUFrameLowering *TFL =
1672 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001673 unsigned StackWidth = TFL->getStackWidth(MF);
1674
1675 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1676
1677 if (VT.isVector()) {
1678 unsigned NumElemVT = VT.getVectorNumElements();
1679 EVT ElemVT = VT.getVectorElementType();
1680 SDValue Loads[4];
1681
Jan Vesely687ca8d2016-05-16 23:56:32 +00001682 assert(NumElemVT <= 4);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001683 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1684 "vector width in load");
1685
1686 for (unsigned i = 0; i < NumElemVT; ++i) {
1687 unsigned Channel, PtrIncr;
1688 getStackAddress(StackWidth, i, Channel, PtrIncr);
1689 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001690 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001691 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1692 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001693 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001694 Op.getOperand(2));
1695 }
Jan Vesely687ca8d2016-05-16 23:56:32 +00001696 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
1697 LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001698 } else {
1699 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1700 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001701 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001702 Op.getOperand(2));
1703 }
1704
Matt Arsenault7939acd2014-04-07 16:44:24 +00001705 SDValue Ops[2] = {
1706 LoweredLoad,
1707 Chain
1708 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001709
Craig Topper64941d92014-04-27 19:20:57 +00001710 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001711}
Tom Stellard75aadc22012-12-11 21:25:42 +00001712
Matt Arsenault1d555c42014-06-23 18:00:55 +00001713SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1714 SDValue Chain = Op.getOperand(0);
1715 SDValue Cond = Op.getOperand(1);
1716 SDValue Jump = Op.getOperand(2);
1717
1718 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1719 Chain, Jump, Cond);
1720}
1721
Matt Arsenault81d06012016-03-07 21:10:13 +00001722SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1723 SelectionDAG &DAG) const {
1724 MachineFunction &MF = DAG.getMachineFunction();
1725 const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
1726
1727 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1728
1729 unsigned FrameIndex = FIN->getIndex();
1730 unsigned IgnoredFrameReg;
1731 unsigned Offset =
1732 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1733 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1734 Op.getValueType());
1735}
1736
Tom Stellard75aadc22012-12-11 21:25:42 +00001737/// XXX Only kernel functions are supported, so we can assume for now that
1738/// every function is a kernel function, but in the future we should use
1739/// separate calling conventions for kernel and non-kernel functions.
1740SDValue R600TargetLowering::LowerFormalArguments(
1741 SDValue Chain,
1742 CallingConv::ID CallConv,
1743 bool isVarArg,
1744 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001745 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001746 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001747 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001748 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1749 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001750 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001751 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001752
Tom Stellardaf775432013-10-23 00:44:32 +00001753 SmallVector<ISD::InputArg, 8> LocalIns;
1754
Matt Arsenault209a7b92014-04-18 07:40:20 +00001755 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001756
1757 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001758
Tom Stellard1e803092013-07-23 01:48:18 +00001759 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001760 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001761 const ISD::InputArg &In = Ins[i];
1762 EVT VT = In.VT;
1763 EVT MemVT = VA.getLocVT();
1764 if (!VT.isVector() && MemVT.isVector()) {
1765 // Get load source type if scalarized.
1766 MemVT = MemVT.getVectorElementType();
1767 }
Tom Stellard78e01292013-07-23 01:47:58 +00001768
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001769 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001770 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1771 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1772 InVals.push_back(Register);
1773 continue;
1774 }
1775
Tom Stellard75aadc22012-12-11 21:25:42 +00001776 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001777 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001778
Matt Arsenaultfae02982014-03-17 18:58:11 +00001779 // i64 isn't a legal type, so the register type used ends up as i32, which
1780 // isn't expected here. It attempts to create this sextload, but it ends up
1781 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1782 // for <1 x i64>.
1783
Tom Stellardacfeebf2013-07-23 01:48:05 +00001784 // The first 36 bytes of the input buffer contains information about
1785 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001786 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1787 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1788 // FIXME: This should really check the extload type, but the handling of
1789 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001790
Matt Arsenault74ef2772014-08-13 18:14:11 +00001791 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1792 Ext = ISD::SEXTLOAD;
1793 }
1794
1795 // Compute the offset from the value.
1796 // XXX - I think PartOffset should give you this, but it seems to give the
1797 // size of the register which isn't useful.
1798
Andrew Trick05938a52015-02-16 18:10:47 +00001799 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001800 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001801 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001802
1803 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1804 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001805 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001806 DAG.getUNDEF(MVT::i32),
1807 PtrInfo,
1808 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001809
1810 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001811 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001812 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001813 }
1814 return Chain;
1815}
1816
Mehdi Amini44ede332015-07-09 02:09:04 +00001817EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1818 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001819 if (!VT.isVector())
1820 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001821 return VT.changeVectorElementTypeToInteger();
1822}
1823
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001824bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1825 unsigned AddrSpace,
1826 unsigned Align,
1827 bool *IsFast) const {
1828 if (IsFast)
1829 *IsFast = false;
1830
1831 if (!VT.isSimple() || VT == MVT::Other)
1832 return false;
1833
1834 if (VT.bitsLT(MVT::i32))
1835 return false;
1836
1837 // TODO: This is a rough estimate.
1838 if (IsFast)
1839 *IsFast = true;
1840
1841 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1842}
1843
Matt Arsenault209a7b92014-04-18 07:40:20 +00001844static SDValue CompactSwizzlableVector(
1845 SelectionDAG &DAG, SDValue VectorEntry,
1846 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001847 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1848 assert(RemapSwizzle.empty());
1849 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001850 VectorEntry.getOperand(0),
1851 VectorEntry.getOperand(1),
1852 VectorEntry.getOperand(2),
1853 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001854 };
1855
1856 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001857 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001858 // We mask write here to teach later passes that the ith element of this
1859 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1860 // break false dependencies and additionnaly make assembly easier to read.
1861 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001862 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1863 if (C->isZero()) {
1864 RemapSwizzle[i] = 4; // SEL_0
1865 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1866 } else if (C->isExactlyValue(1.0)) {
1867 RemapSwizzle[i] = 5; // SEL_1
1868 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1869 }
1870 }
1871
Sanjay Patel57195842016-03-14 17:28:46 +00001872 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001873 continue;
1874 for (unsigned j = 0; j < i; j++) {
1875 if (NewBldVec[i] == NewBldVec[j]) {
1876 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1877 RemapSwizzle[i] = j;
1878 break;
1879 }
1880 }
1881 }
1882
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001883 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1884 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001885}
1886
Benjamin Kramer193960c2013-06-11 13:32:25 +00001887static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1888 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001889 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1890 assert(RemapSwizzle.empty());
1891 SDValue NewBldVec[4] = {
1892 VectorEntry.getOperand(0),
1893 VectorEntry.getOperand(1),
1894 VectorEntry.getOperand(2),
1895 VectorEntry.getOperand(3)
1896 };
1897 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001898 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001899 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001900 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1901 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1902 ->getZExtValue();
1903 if (i == Idx)
1904 isUnmovable[Idx] = true;
1905 }
1906 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001907
1908 for (unsigned i = 0; i < 4; i++) {
1909 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1910 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1911 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001912 if (isUnmovable[Idx])
1913 continue;
1914 // Swap i and Idx
1915 std::swap(NewBldVec[Idx], NewBldVec[i]);
1916 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1917 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001918 }
1919 }
1920
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001921 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1922 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001923}
1924
1925
1926SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001927 SDValue Swz[4], SelectionDAG &DAG,
1928 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001929 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1930 // Old -> New swizzle values
1931 DenseMap<unsigned, unsigned> SwizzleRemap;
1932
1933 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1934 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001935 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001936 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001937 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001938 }
1939
1940 SwizzleRemap.clear();
1941 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1942 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001943 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001944 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001945 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001946 }
1947
1948 return BuildVector;
1949}
1950
1951
Tom Stellard75aadc22012-12-11 21:25:42 +00001952//===----------------------------------------------------------------------===//
1953// Custom DAG Optimizations
1954//===----------------------------------------------------------------------===//
1955
1956SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1957 DAGCombinerInfo &DCI) const {
1958 SelectionDAG &DAG = DCI.DAG;
1959
1960 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001961 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001962 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1963 case ISD::FP_ROUND: {
1964 SDValue Arg = N->getOperand(0);
1965 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001966 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001967 Arg.getOperand(0));
1968 }
1969 break;
1970 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001971
1972 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1973 // (i32 select_cc f32, f32, -1, 0 cc)
1974 //
1975 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1976 // this to one of the SET*_DX10 instructions.
1977 case ISD::FP_TO_SINT: {
1978 SDValue FNeg = N->getOperand(0);
1979 if (FNeg.getOpcode() != ISD::FNEG) {
1980 return SDValue();
1981 }
1982 SDValue SelectCC = FNeg.getOperand(0);
1983 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1984 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1985 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1986 !isHWTrueValue(SelectCC.getOperand(2)) ||
1987 !isHWFalseValue(SelectCC.getOperand(3))) {
1988 return SDValue();
1989 }
1990
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001991 SDLoc dl(N);
1992 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001993 SelectCC.getOperand(0), // LHS
1994 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001995 DAG.getConstant(-1, dl, MVT::i32), // True
1996 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001997 SelectCC.getOperand(4)); // CC
1998
1999 break;
2000 }
Quentin Colombete2e05482013-07-30 00:27:16 +00002001
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00002002 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
2003 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00002004 case ISD::INSERT_VECTOR_ELT: {
2005 SDValue InVec = N->getOperand(0);
2006 SDValue InVal = N->getOperand(1);
2007 SDValue EltNo = N->getOperand(2);
2008 SDLoc dl(N);
2009
2010 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00002011 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00002012 return InVec;
2013
2014 EVT VT = InVec.getValueType();
2015
2016 // If we can't generate a legal BUILD_VECTOR, exit
2017 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
2018 return SDValue();
2019
2020 // Check that we know which element is being inserted
2021 if (!isa<ConstantSDNode>(EltNo))
2022 return SDValue();
2023 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2024
2025 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2026 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2027 // vector elements.
2028 SmallVector<SDValue, 8> Ops;
2029 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2030 Ops.append(InVec.getNode()->op_begin(),
2031 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00002032 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00002033 unsigned NElts = VT.getVectorNumElements();
2034 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2035 } else {
2036 return SDValue();
2037 }
2038
2039 // Insert the element
2040 if (Elt < Ops.size()) {
2041 // All the operands of BUILD_VECTOR must have the same type;
2042 // we enforce that here.
2043 EVT OpVT = Ops[0].getValueType();
2044 if (InVal.getValueType() != OpVT)
2045 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2046 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2047 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2048 Ops[Elt] = InVal;
2049 }
2050
2051 // Return the new vector
Ahmed Bougacha128f8732016-04-26 21:15:30 +00002052 return DAG.getBuildVector(VT, dl, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002053 }
2054
Tom Stellard365366f2013-01-23 02:09:06 +00002055 // Extract_vec (Build_vector) generated by custom lowering
2056 // also needs to be customly combined
2057 case ISD::EXTRACT_VECTOR_ELT: {
2058 SDValue Arg = N->getOperand(0);
2059 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2060 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2061 unsigned Element = Const->getZExtValue();
2062 return Arg->getOperand(Element);
2063 }
2064 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002065 if (Arg.getOpcode() == ISD::BITCAST &&
2066 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2067 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2068 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002069 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002070 Arg->getOperand(0).getOperand(Element));
2071 }
2072 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002073 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002074 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002075
2076 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002077 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002078 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002079 return Ret;
2080
Tom Stellarde06163a2013-02-07 14:02:35 +00002081 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2082 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002083 //
2084 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2085 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002086 SDValue LHS = N->getOperand(0);
2087 if (LHS.getOpcode() != ISD::SELECT_CC) {
2088 return SDValue();
2089 }
2090
2091 SDValue RHS = N->getOperand(1);
2092 SDValue True = N->getOperand(2);
2093 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002094 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002095
2096 if (LHS.getOperand(2).getNode() != True.getNode() ||
2097 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002098 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002099 return SDValue();
2100 }
2101
Tom Stellard5e524892013-03-08 15:37:11 +00002102 switch (NCC) {
2103 default: return SDValue();
2104 case ISD::SETNE: return LHS;
2105 case ISD::SETEQ: {
2106 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2107 LHSCC = ISD::getSetCCInverse(LHSCC,
2108 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002109 if (DCI.isBeforeLegalizeOps() ||
2110 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2111 return DAG.getSelectCC(SDLoc(N),
2112 LHS.getOperand(0),
2113 LHS.getOperand(1),
2114 LHS.getOperand(2),
2115 LHS.getOperand(3),
2116 LHSCC);
2117 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002118 }
Tom Stellard5e524892013-03-08 15:37:11 +00002119 }
Tom Stellardcd428182013-09-28 02:50:38 +00002120 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002121 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002122
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002123 case AMDGPUISD::EXPORT: {
2124 SDValue Arg = N->getOperand(1);
2125 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2126 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002127
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002128 SDValue NewArgs[8] = {
2129 N->getOperand(0), // Chain
2130 SDValue(),
2131 N->getOperand(2), // ArrayBase
2132 N->getOperand(3), // Type
2133 N->getOperand(4), // SWZ_X
2134 N->getOperand(5), // SWZ_Y
2135 N->getOperand(6), // SWZ_Z
2136 N->getOperand(7) // SWZ_W
2137 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002138 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002139 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002140 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002141 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002142 case AMDGPUISD::TEXTURE_FETCH: {
2143 SDValue Arg = N->getOperand(1);
2144 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2145 break;
2146
2147 SDValue NewArgs[19] = {
2148 N->getOperand(0),
2149 N->getOperand(1),
2150 N->getOperand(2),
2151 N->getOperand(3),
2152 N->getOperand(4),
2153 N->getOperand(5),
2154 N->getOperand(6),
2155 N->getOperand(7),
2156 N->getOperand(8),
2157 N->getOperand(9),
2158 N->getOperand(10),
2159 N->getOperand(11),
2160 N->getOperand(12),
2161 N->getOperand(13),
2162 N->getOperand(14),
2163 N->getOperand(15),
2164 N->getOperand(16),
2165 N->getOperand(17),
2166 N->getOperand(18),
2167 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002168 SDLoc DL(N);
2169 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2170 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002171 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002172 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002173
2174 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002175}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002176
2177static bool
2178FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002179 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002180 const R600InstrInfo *TII =
2181 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002182 if (!Src.isMachineOpcode())
2183 return false;
2184 switch (Src.getMachineOpcode()) {
2185 case AMDGPU::FNEG_R600:
2186 if (!Neg.getNode())
2187 return false;
2188 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002189 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002190 return true;
2191 case AMDGPU::FABS_R600:
2192 if (!Abs.getNode())
2193 return false;
2194 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002195 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002196 return true;
2197 case AMDGPU::CONST_COPY: {
2198 unsigned Opcode = ParentNode->getMachineOpcode();
2199 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2200
2201 if (!Sel.getNode())
2202 return false;
2203
2204 SDValue CstOffset = Src.getOperand(0);
2205 if (ParentNode->getValueType(0).isVector())
2206 return false;
2207
2208 // Gather constants values
2209 int SrcIndices[] = {
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2221 };
2222 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002223 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002224 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2225 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2226 continue;
2227 if (HasDst) {
2228 OtherSrcIdx--;
2229 OtherSelIdx--;
2230 }
2231 if (RegisterSDNode *Reg =
2232 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2233 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002234 ConstantSDNode *Cst
2235 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002236 Consts.push_back(Cst->getZExtValue());
2237 }
2238 }
2239 }
2240
Matt Arsenault37c12d72014-05-12 20:42:57 +00002241 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002242 Consts.push_back(Cst->getZExtValue());
2243 if (!TII->fitsConstReadLimitations(Consts)) {
2244 return false;
2245 }
2246
2247 Sel = CstOffset;
2248 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2249 return true;
2250 }
Jan Vesely16800392016-05-13 20:39:31 +00002251 case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2252 // Check if the Imm slot is used. Taken from below.
2253 if (cast<ConstantSDNode>(Imm)->getZExtValue())
2254 return false;
2255 Imm = Src.getOperand(0);
2256 Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2257 return true;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002258 case AMDGPU::MOV_IMM_I32:
2259 case AMDGPU::MOV_IMM_F32: {
2260 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2261 uint64_t ImmValue = 0;
2262
2263
2264 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2265 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2266 float FloatValue = FPC->getValueAPF().convertToFloat();
2267 if (FloatValue == 0.0) {
2268 ImmReg = AMDGPU::ZERO;
2269 } else if (FloatValue == 0.5) {
2270 ImmReg = AMDGPU::HALF;
2271 } else if (FloatValue == 1.0) {
2272 ImmReg = AMDGPU::ONE;
2273 } else {
2274 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2275 }
2276 } else {
2277 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2278 uint64_t Value = C->getZExtValue();
2279 if (Value == 0) {
2280 ImmReg = AMDGPU::ZERO;
2281 } else if (Value == 1) {
2282 ImmReg = AMDGPU::ONE_INT;
2283 } else {
2284 ImmValue = Value;
2285 }
2286 }
2287
2288 // Check that we aren't already using an immediate.
2289 // XXX: It's possible for an instruction to have more than one
2290 // immediate operand, but this is not supported yet.
2291 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2292 if (!Imm.getNode())
2293 return false;
2294 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2295 assert(C);
2296 if (C->getZExtValue())
2297 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002298 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002299 }
2300 Src = DAG.getRegister(ImmReg, MVT::i32);
2301 return true;
2302 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002303 default:
2304 return false;
2305 }
2306}
2307
2308
2309/// \brief Fold the instructions after selecting them
2310SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2311 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002312 const R600InstrInfo *TII =
2313 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002314 if (!Node->isMachineOpcode())
2315 return Node;
2316 unsigned Opcode = Node->getMachineOpcode();
2317 SDValue FakeOp;
2318
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002319 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002320
2321 if (Opcode == AMDGPU::DOT_4) {
2322 int OperandIdx[] = {
2323 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2324 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2325 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2326 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2327 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2328 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2329 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2330 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002331 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002332 int NegIdx[] = {
2333 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2334 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2335 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2336 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2337 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2338 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2339 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2340 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2341 };
2342 int AbsIdx[] = {
2343 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2344 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2345 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2346 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2347 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2348 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2349 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2350 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2351 };
2352 for (unsigned i = 0; i < 8; i++) {
2353 if (OperandIdx[i] < 0)
2354 return Node;
2355 SDValue &Src = Ops[OperandIdx[i] - 1];
2356 SDValue &Neg = Ops[NegIdx[i] - 1];
2357 SDValue &Abs = Ops[AbsIdx[i] - 1];
2358 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2359 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2360 if (HasDst)
2361 SelIdx--;
2362 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002363 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2364 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2365 }
2366 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2367 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2368 SDValue &Src = Ops[i];
2369 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002370 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2371 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002372 } else if (Opcode == AMDGPU::CLAMP_R600) {
2373 SDValue Src = Node->getOperand(0);
2374 if (!Src.isMachineOpcode() ||
2375 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2376 return Node;
2377 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2378 AMDGPU::OpName::clamp);
2379 if (ClampIdx < 0)
2380 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002381 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002382 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002383 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2384 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2385 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002386 } else {
2387 if (!TII->hasInstrModifiers(Opcode))
2388 return Node;
2389 int OperandIdx[] = {
2390 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2391 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2392 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2393 };
2394 int NegIdx[] = {
2395 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2396 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2397 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2398 };
2399 int AbsIdx[] = {
2400 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2401 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2402 -1
2403 };
2404 for (unsigned i = 0; i < 3; i++) {
2405 if (OperandIdx[i] < 0)
2406 return Node;
2407 SDValue &Src = Ops[OperandIdx[i] - 1];
2408 SDValue &Neg = Ops[NegIdx[i] - 1];
2409 SDValue FakeAbs;
2410 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2411 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2412 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002413 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2414 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002415 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002416 ImmIdx--;
2417 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002418 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002419 SDValue &Imm = Ops[ImmIdx];
2420 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002421 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2422 }
2423 }
2424
2425 return Node;
2426}