blob: 95b950f65a4672c915fc1a6afa9a7efda46e4768 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000289 case AMDGPU::RAT_STORE_TYPED_eg: {
290 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
291 .addOperand(MI->getOperand(0))
292 .addOperand(MI->getOperand(1))
293 .addOperand(MI->getOperand(2))
294 .addImm(isEOP(I)); // Set End of program bit
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Tom Stellard75aadc22012-12-11 21:25:42 +0000298 case AMDGPU::TXD: {
299 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
300 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000301 MachineOperand &RID = MI->getOperand(4);
302 MachineOperand &SID = MI->getOperand(5);
303 unsigned TextureId = MI->getOperand(6).getImm();
304 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
305 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000306
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 switch (TextureId) {
308 case 5: // Rect
309 CTX = CTY = 0;
310 break;
311 case 6: // Shadow1D
312 SrcW = SrcZ;
313 break;
314 case 7: // Shadow2D
315 SrcW = SrcZ;
316 break;
317 case 8: // ShadowRect
318 CTX = CTY = 0;
319 SrcW = SrcZ;
320 break;
321 case 9: // 1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 10: // 2DArray
326 CTZ = 0;
327 break;
328 case 11: // Shadow1DArray
329 SrcZ = SrcY;
330 CTZ = 0;
331 break;
332 case 12: // Shadow2DArray
333 CTZ = 0;
334 break;
335 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
337 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
356 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
375 .addOperand(MI->getOperand(0))
376 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000377 .addImm(SrcX)
378 .addImm(SrcY)
379 .addImm(SrcZ)
380 .addImm(SrcW)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(1)
386 .addImm(2)
387 .addImm(3)
388 .addOperand(RID)
389 .addOperand(SID)
390 .addImm(CTX)
391 .addImm(CTY)
392 .addImm(CTZ)
393 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 .addReg(T0, RegState::Implicit)
395 .addReg(T1, RegState::Implicit);
396 break;
397 }
398
399 case AMDGPU::TXD_SHADOW: {
400 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
401 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000402 MachineOperand &RID = MI->getOperand(4);
403 MachineOperand &SID = MI->getOperand(5);
404 unsigned TextureId = MI->getOperand(6).getImm();
405 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
406 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
407
408 switch (TextureId) {
409 case 5: // Rect
410 CTX = CTY = 0;
411 break;
412 case 6: // Shadow1D
413 SrcW = SrcZ;
414 break;
415 case 7: // Shadow2D
416 SrcW = SrcZ;
417 break;
418 case 8: // ShadowRect
419 CTX = CTY = 0;
420 SrcW = SrcZ;
421 break;
422 case 9: // 1DArray
423 SrcZ = SrcY;
424 CTZ = 0;
425 break;
426 case 10: // 2DArray
427 CTZ = 0;
428 break;
429 case 11: // Shadow1DArray
430 SrcZ = SrcY;
431 CTZ = 0;
432 break;
433 case 12: // Shadow2DArray
434 CTZ = 0;
435 break;
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437
438 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
439 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
458 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
477 .addOperand(MI->getOperand(0))
478 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000479 .addImm(SrcX)
480 .addImm(SrcY)
481 .addImm(SrcZ)
482 .addImm(SrcW)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(0)
487 .addImm(1)
488 .addImm(2)
489 .addImm(3)
490 .addOperand(RID)
491 .addOperand(SID)
492 .addImm(CTX)
493 .addImm(CTY)
494 .addImm(CTZ)
495 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addReg(T0, RegState::Implicit)
497 .addReg(T1, RegState::Implicit);
498 break;
499 }
500
501 case AMDGPU::BRANCH:
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 break;
505
506 case AMDGPU::BRANCH_COND_f32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
520 case AMDGPU::BRANCH_COND_i32: {
521 MachineInstr *NewMI =
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
523 AMDGPU::PREDICATE_BIT)
524 .addOperand(MI->getOperand(1))
525 .addImm(OPCODE_IS_NOT_ZERO_INT)
526 .addImm(0); // Flags
527 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 .addOperand(MI->getOperand(0))
530 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
531 break;
532 }
533
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 case AMDGPU::EG_ExportSwz:
535 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 // Instruction is left unmodified if its not the last one of its type
537 bool isLastInstructionOfItsType = true;
538 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000539 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000540 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
543 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
544 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
545 .getImm();
546 if (CurrentInstExportType == InstExportType) {
547 isLastInstructionOfItsType = false;
548 break;
549 }
550 }
551 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000552 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000553 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 return BB;
555 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
556 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
557 .addOperand(MI->getOperand(0))
558 .addOperand(MI->getOperand(1))
559 .addOperand(MI->getOperand(2))
560 .addOperand(MI->getOperand(3))
561 .addOperand(MI->getOperand(4))
562 .addOperand(MI->getOperand(5))
563 .addOperand(MI->getOperand(6))
564 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 break;
567 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000568 case AMDGPU::RETURN: {
569 // RETURN instructions must have the live-out registers as implicit uses,
570 // otherwise they appear dead.
571 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
572 MachineInstrBuilder MIB(*MF, MI);
573 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
574 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
575 return BB;
576 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000577 }
578
579 MI->eraseFromParent();
580 return BB;
581}
582
583//===----------------------------------------------------------------------===//
584// Custom DAG Lowering Operations
585//===----------------------------------------------------------------------===//
586
Tom Stellard75aadc22012-12-11 21:25:42 +0000587SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000588 MachineFunction &MF = DAG.getMachineFunction();
589 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 switch (Op.getOpcode()) {
591 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000592 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000594 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000595 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000596 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000597 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
598 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000599 case ISD::FCOS:
600 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000603 case ISD::LOAD: {
604 SDValue Result = LowerLOAD(Op, DAG);
605 assert((!Result.getNode() ||
606 Result.getNode()->getNumValues() == 2) &&
607 "Load should return a value and a chain");
608 return Result;
609 }
610
Matt Arsenault1d555c42014-06-23 18:00:55 +0000611 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000612 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000613 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000614 case ISD::INTRINSIC_VOID: {
615 SDValue Chain = Op.getOperand(0);
616 unsigned IntrinsicID =
617 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
618 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000619 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000620 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000621 const SDValue Args[8] = {
622 Chain,
623 Op.getOperand(2), // Export Value
624 Op.getOperand(3), // ArrayBase
625 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000626 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
627 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
628 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
629 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000630 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000631 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000632 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000633
Tom Stellard75aadc22012-12-11 21:25:42 +0000634 // default for switch(IntrinsicID)
635 default: break;
636 }
637 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
638 break;
639 }
640 case ISD::INTRINSIC_WO_CHAIN: {
641 unsigned IntrinsicID =
642 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
643 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000644 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000645 switch(IntrinsicID) {
646 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000647 case AMDGPUIntrinsic::R600_interp_xy:
648 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000649 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000650 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000651 SDValue RegisterINode = Op.getOperand(2);
652 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000653
Vincent Lejeunef143af32013-11-11 22:10:24 +0000654 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000655 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000656 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000657 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000658 else
659 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000660 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000661 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000662 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
663 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000664 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000665 case AMDGPUIntrinsic::r600_tex:
666 case AMDGPUIntrinsic::r600_texc:
667 case AMDGPUIntrinsic::r600_txl:
668 case AMDGPUIntrinsic::r600_txlc:
669 case AMDGPUIntrinsic::r600_txb:
670 case AMDGPUIntrinsic::r600_txbc:
671 case AMDGPUIntrinsic::r600_txf:
672 case AMDGPUIntrinsic::r600_txq:
673 case AMDGPUIntrinsic::r600_ddx:
674 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000675 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000676 unsigned TextureOp;
677 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000678 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000679 TextureOp = 0;
680 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000681 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000682 TextureOp = 1;
683 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000684 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000685 TextureOp = 2;
686 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000687 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000688 TextureOp = 3;
689 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000690 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000691 TextureOp = 4;
692 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000693 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000694 TextureOp = 5;
695 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000696 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000697 TextureOp = 6;
698 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000699 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000700 TextureOp = 7;
701 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000702 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000703 TextureOp = 8;
704 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000705 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000706 TextureOp = 9;
707 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000708 case AMDGPUIntrinsic::R600_ldptr:
709 TextureOp = 10;
710 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000711 default:
712 llvm_unreachable("Unknow Texture Operation");
713 }
714
715 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000716 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000717 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000718 DAG.getConstant(0, DL, MVT::i32),
719 DAG.getConstant(1, DL, MVT::i32),
720 DAG.getConstant(2, DL, MVT::i32),
721 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000722 Op.getOperand(2),
723 Op.getOperand(3),
724 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000725 DAG.getConstant(0, DL, MVT::i32),
726 DAG.getConstant(1, DL, MVT::i32),
727 DAG.getConstant(2, DL, MVT::i32),
728 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000729 Op.getOperand(5),
730 Op.getOperand(6),
731 Op.getOperand(7),
732 Op.getOperand(8),
733 Op.getOperand(9),
734 Op.getOperand(10)
735 };
Craig Topper48d114b2014-04-26 18:35:24 +0000736 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000737 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000738 case AMDGPUIntrinsic::AMDGPU_dp4: {
739 SDValue Args[8] = {
740 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000741 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000742 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000743 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000744 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000745 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000746 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000747 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000748 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000749 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000751 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000753 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000756 };
Craig Topper48d114b2014-04-26 18:35:24 +0000757 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000758 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000759
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000760 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000761 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000762 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000763 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000764 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000765 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000766 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000767 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 8);
778
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000779 case Intrinsic::r600_read_workdim:
780 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000781 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
782 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
783 }
Jan Veselye5121f32014-10-14 20:05:26 +0000784
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000785 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000786 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
787 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000788 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000789 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
790 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000791 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000792 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
793 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000794 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000795 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
796 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
799 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000800 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
802 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000803
804 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000805 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenault79963e82016-02-13 01:03:00 +0000806 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000807
808 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000809 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000810 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
811 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 }
813 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
814 break;
815 }
816 } // end switch(Op.getOpcode())
817 return SDValue();
818}
819
820void R600TargetLowering::ReplaceNodeResults(SDNode *N,
821 SmallVectorImpl<SDValue> &Results,
822 SelectionDAG &DAG) const {
823 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000824 default:
825 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
826 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000827 case ISD::FP_TO_UINT:
828 if (N->getValueType(0) == MVT::i1) {
829 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
830 return;
831 }
832 // Fall-through. Since we don't care about out of bounds values
833 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
834 // considers some extra cases which are not necessary here.
835 case ISD::FP_TO_SINT: {
836 SDValue Result;
837 if (expandFP_TO_SINT(N, Result, DAG))
838 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000839 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000840 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000841 case ISD::SDIVREM: {
842 SDValue Op = SDValue(N, 1);
843 SDValue RES = LowerSDIVREM(Op, DAG);
844 Results.push_back(RES);
845 Results.push_back(RES.getValue(1));
846 break;
847 }
848 case ISD::UDIVREM: {
849 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000850 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000851 break;
852 }
853 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000854}
855
Tom Stellard880a80a2014-06-17 16:53:14 +0000856SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
857 SDValue Vector) const {
858
859 SDLoc DL(Vector);
860 EVT VecVT = Vector.getValueType();
861 EVT EltVT = VecVT.getVectorElementType();
862 SmallVector<SDValue, 8> Args;
863
864 for (unsigned i = 0, e = VecVT.getVectorNumElements();
865 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000866 Args.push_back(DAG.getNode(
867 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
868 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000869 }
870
871 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
872}
873
874SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
875 SelectionDAG &DAG) const {
876
877 SDLoc DL(Op);
878 SDValue Vector = Op.getOperand(0);
879 SDValue Index = Op.getOperand(1);
880
881 if (isa<ConstantSDNode>(Index) ||
882 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
883 return Op;
884
885 Vector = vectorToVerticalVector(DAG, Vector);
886 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
887 Vector, Index);
888}
889
890SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
891 SelectionDAG &DAG) const {
892 SDLoc DL(Op);
893 SDValue Vector = Op.getOperand(0);
894 SDValue Value = Op.getOperand(1);
895 SDValue Index = Op.getOperand(2);
896
897 if (isa<ConstantSDNode>(Index) ||
898 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
899 return Op;
900
901 Vector = vectorToVerticalVector(DAG, Vector);
902 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
903 Vector, Value, Index);
904 return vectorToVerticalVector(DAG, Insert);
905}
906
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000907SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
908 // On hw >= R700, COS/SIN input must be between -1. and 1.
909 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
910 EVT VT = Op.getValueType();
911 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000912 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000913
914 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000915 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
916 DAG.getNode(ISD::FADD, DL, VT,
917 DAG.getNode(ISD::FMUL, DL, VT, Arg,
918 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
919 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000920 unsigned TrigNode;
921 switch (Op.getOpcode()) {
922 case ISD::FCOS:
923 TrigNode = AMDGPUISD::COS_HW;
924 break;
925 case ISD::FSIN:
926 TrigNode = AMDGPUISD::SIN_HW;
927 break;
928 default:
929 llvm_unreachable("Wrong trig opcode");
930 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000931 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
932 DAG.getNode(ISD::FADD, DL, VT, FractPart,
933 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000934 if (Gen >= AMDGPUSubtarget::R700)
935 return TrigVal;
936 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000937 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
938 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000939}
940
Jan Vesely25f36272014-06-18 12:27:13 +0000941SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
942 SDLoc DL(Op);
943 EVT VT = Op.getValueType();
944
945 SDValue Lo = Op.getOperand(0);
946 SDValue Hi = Op.getOperand(1);
947 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000948 SDValue Zero = DAG.getConstant(0, DL, VT);
949 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000950
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000951 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
952 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000953 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
954 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
955
956 // The dance around Width1 is necessary for 0 special case.
957 // Without it the CompShift might be 32, producing incorrect results in
958 // Overflow. So we do the shift in two steps, the alternative is to
959 // add a conditional to filter the special case.
960
961 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
962 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
963
964 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
965 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
966 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
967
968 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
969 SDValue LoBig = Zero;
970
971 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
972 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
973
974 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
975}
976
Jan Vesely900ff2e2014-06-18 12:27:15 +0000977SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
978 SDLoc DL(Op);
979 EVT VT = Op.getValueType();
980
981 SDValue Lo = Op.getOperand(0);
982 SDValue Hi = Op.getOperand(1);
983 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000984 SDValue Zero = DAG.getConstant(0, DL, VT);
985 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000986
Jan Veselyecf51332014-06-18 12:27:17 +0000987 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
988
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000989 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
990 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000991 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
992 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
993
994 // The dance around Width1 is necessary for 0 special case.
995 // Without it the CompShift might be 32, producing incorrect results in
996 // Overflow. So we do the shift in two steps, the alternative is to
997 // add a conditional to filter the special case.
998
999 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1000 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1001
Jan Veselyecf51332014-06-18 12:27:17 +00001002 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001003 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1004 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1005
Jan Veselyecf51332014-06-18 12:27:17 +00001006 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1007 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001008
1009 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1010 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1011
1012 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1013}
1014
Jan Vesely808fff52015-04-30 17:15:56 +00001015SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1016 unsigned mainop, unsigned ovf) const {
1017 SDLoc DL(Op);
1018 EVT VT = Op.getValueType();
1019
1020 SDValue Lo = Op.getOperand(0);
1021 SDValue Hi = Op.getOperand(1);
1022
1023 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1024 // Extend sign.
1025 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1026 DAG.getValueType(MVT::i1));
1027
1028 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1029
1030 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1031}
1032
Tom Stellard75aadc22012-12-11 21:25:42 +00001033SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001034 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001035 return DAG.getNode(
1036 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001037 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001038 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001039 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001040 DAG.getCondCode(ISD::SETNE)
1041 );
1042}
1043
Tom Stellard75aadc22012-12-11 21:25:42 +00001044SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001045 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001046 unsigned DwordOffset) const {
1047 unsigned ByteOffset = DwordOffset * 4;
1048 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001049 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001050
1051 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1052 assert(isInt<16>(ByteOffset));
1053
1054 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001055 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001056 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1057 false, false, false, 0);
1058}
1059
Tom Stellard75aadc22012-12-11 21:25:42 +00001060bool R600TargetLowering::isZero(SDValue Op) const {
1061 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1062 return Cst->isNullValue();
1063 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1064 return CstFP->isZero();
1065 } else {
1066 return false;
1067 }
1068}
1069
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001070bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1071 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1072 return CFP->isExactlyValue(1.0);
1073 }
1074 return isAllOnesConstant(Op);
1075}
1076
1077bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1078 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1079 return CFP->getValueAPF().isZero();
1080 }
1081 return isNullConstant(Op);
1082}
1083
Tom Stellard75aadc22012-12-11 21:25:42 +00001084SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001085 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001086 EVT VT = Op.getValueType();
1087
1088 SDValue LHS = Op.getOperand(0);
1089 SDValue RHS = Op.getOperand(1);
1090 SDValue True = Op.getOperand(2);
1091 SDValue False = Op.getOperand(3);
1092 SDValue CC = Op.getOperand(4);
1093 SDValue Temp;
1094
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001095 if (VT == MVT::f32) {
1096 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1097 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1098 if (MinMax)
1099 return MinMax;
1100 }
1101
Tom Stellard75aadc22012-12-11 21:25:42 +00001102 // LHS and RHS are guaranteed to be the same value type
1103 EVT CompareVT = LHS.getValueType();
1104
1105 // Check if we can lower this to a native operation.
1106
Tom Stellard2add82d2013-03-08 15:37:09 +00001107 // Try to lower to a SET* instruction:
1108 //
1109 // SET* can match the following patterns:
1110 //
Tom Stellardcd428182013-09-28 02:50:38 +00001111 // select_cc f32, f32, -1, 0, cc_supported
1112 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1113 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001114 //
1115
1116 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001117 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1118 ISD::CondCode InverseCC =
1119 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001120 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1121 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1122 std::swap(False, True);
1123 CC = DAG.getCondCode(InverseCC);
1124 } else {
1125 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1126 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1127 std::swap(False, True);
1128 std::swap(LHS, RHS);
1129 CC = DAG.getCondCode(SwapInvCC);
1130 }
1131 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001132 }
1133
1134 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1135 (CompareVT == VT || VT == MVT::i32)) {
1136 // This can be matched by a SET* instruction.
1137 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1138 }
1139
Tom Stellard75aadc22012-12-11 21:25:42 +00001140 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001141 //
1142 // CND* can match the following patterns:
1143 //
Tom Stellardcd428182013-09-28 02:50:38 +00001144 // select_cc f32, 0.0, f32, f32, cc_supported
1145 // select_cc f32, 0.0, i32, i32, cc_supported
1146 // select_cc i32, 0, f32, f32, cc_supported
1147 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001148 //
Tom Stellardcd428182013-09-28 02:50:38 +00001149
1150 // Try to move the zero value to the RHS
1151 if (isZero(LHS)) {
1152 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1153 // Try swapping the operands
1154 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1155 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1156 std::swap(LHS, RHS);
1157 CC = DAG.getCondCode(CCSwapped);
1158 } else {
1159 // Try inverting the conditon and then swapping the operands
1160 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1161 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1162 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1163 std::swap(True, False);
1164 std::swap(LHS, RHS);
1165 CC = DAG.getCondCode(CCSwapped);
1166 }
1167 }
1168 }
1169 if (isZero(RHS)) {
1170 SDValue Cond = LHS;
1171 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001172 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1173 if (CompareVT != VT) {
1174 // Bitcast True / False to the correct types. This will end up being
1175 // a nop, but it allows us to define only a single pattern in the
1176 // .TD files for each CND* instruction rather than having to have
1177 // one pattern for integer True/False and one for fp True/False
1178 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1179 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1180 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001181
1182 switch (CCOpcode) {
1183 case ISD::SETONE:
1184 case ISD::SETUNE:
1185 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001186 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1187 Temp = True;
1188 True = False;
1189 False = Temp;
1190 break;
1191 default:
1192 break;
1193 }
1194 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1195 Cond, Zero,
1196 True, False,
1197 DAG.getCondCode(CCOpcode));
1198 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1199 }
1200
Tom Stellard75aadc22012-12-11 21:25:42 +00001201 // If we make it this for it means we have no native instructions to handle
1202 // this SELECT_CC, so we must lower it.
1203 SDValue HWTrue, HWFalse;
1204
1205 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001206 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1207 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001208 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001209 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1210 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001211 }
1212 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001213 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001214 }
1215
1216 // Lower this unsupported SELECT_CC into a combination of two supported
1217 // SELECT_CC operations.
1218 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1219
1220 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1221 Cond, HWFalse,
1222 True, False,
1223 DAG.getCondCode(ISD::SETNE));
1224}
1225
Alp Tokercb402912014-01-24 17:20:08 +00001226/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001227/// convert these pointers to a register index. Each register holds
1228/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1229/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1230/// for indirect addressing.
1231SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1232 unsigned StackWidth,
1233 SelectionDAG &DAG) const {
1234 unsigned SRLPad;
1235 switch(StackWidth) {
1236 case 1:
1237 SRLPad = 2;
1238 break;
1239 case 2:
1240 SRLPad = 3;
1241 break;
1242 case 4:
1243 SRLPad = 4;
1244 break;
1245 default: llvm_unreachable("Invalid stack width");
1246 }
1247
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001248 SDLoc DL(Ptr);
1249 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1250 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001251}
1252
1253void R600TargetLowering::getStackAddress(unsigned StackWidth,
1254 unsigned ElemIdx,
1255 unsigned &Channel,
1256 unsigned &PtrIncr) const {
1257 switch (StackWidth) {
1258 default:
1259 case 1:
1260 Channel = 0;
1261 if (ElemIdx > 0) {
1262 PtrIncr = 1;
1263 } else {
1264 PtrIncr = 0;
1265 }
1266 break;
1267 case 2:
1268 Channel = ElemIdx % 2;
1269 if (ElemIdx == 2) {
1270 PtrIncr = 1;
1271 } else {
1272 PtrIncr = 0;
1273 }
1274 break;
1275 case 4:
1276 Channel = ElemIdx;
1277 PtrIncr = 0;
1278 break;
1279 }
1280}
1281
Matt Arsenault95245662016-02-11 05:32:46 +00001282SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1283 SelectionDAG &DAG) const {
1284 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001285
Matt Arsenault95245662016-02-11 05:32:46 +00001286 unsigned Mask = 0;
1287 if (Store->getMemoryVT() == MVT::i8) {
1288 Mask = 0xff;
1289 } else if (Store->getMemoryVT() == MVT::i16) {
1290 Mask = 0xffff;
1291 }
1292
1293 SDValue Chain = Store->getChain();
1294 SDValue BasePtr = Store->getBasePtr();
1295 EVT MemVT = Store->getMemoryVT();
1296
1297 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1298 DAG.getConstant(2, DL, MVT::i32));
1299 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1300 Chain, Ptr,
1301 DAG.getTargetConstant(0, DL, MVT::i32));
1302
1303 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1304 DAG.getConstant(0x3, DL, MVT::i32));
1305
1306 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1307 DAG.getConstant(3, DL, MVT::i32));
1308
1309 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1310 Store->getValue());
1311
1312 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1313
1314 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1315 MaskedValue, ShiftAmt);
1316
1317 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1318 DAG.getConstant(Mask, DL, MVT::i32),
1319 ShiftAmt);
1320 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1321 DAG.getConstant(0xffffffff, DL, MVT::i32));
1322 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1323
1324 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1325 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1326 Chain, Value, Ptr,
1327 DAG.getTargetConstant(0, DL, MVT::i32));
1328}
1329
1330SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1331 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001332 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001333
Matt Arsenault95245662016-02-11 05:32:46 +00001334 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1335 unsigned AS = StoreNode->getAddressSpace();
1336 SDValue Value = StoreNode->getValue();
1337 EVT ValueVT = Value.getValueType();
1338
1339 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1340 ValueVT.isVector()) {
1341 return SplitVectorStore(Op, DAG);
1342 }
1343
1344 SDLoc DL(Op);
1345 SDValue Chain = StoreNode->getChain();
1346 SDValue Ptr = StoreNode->getBasePtr();
1347
1348 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001349 if (StoreNode->isTruncatingStore()) {
1350 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001351 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001352 EVT MemVT = StoreNode->getMemoryVT();
1353 SDValue MaskConstant;
1354 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001355 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001356 } else {
1357 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001358 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001359 }
1360 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001361 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001362 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001363 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001364 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1365 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001366 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001367 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1368 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1369 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1370 // vector instead.
1371 SDValue Src[4] = {
1372 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001373 DAG.getConstant(0, DL, MVT::i32),
1374 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001375 Mask
1376 };
Craig Topper48d114b2014-04-26 18:35:24 +00001377 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001378 SDValue Args[3] = { Chain, Input, DWordAddr };
1379 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001380 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001381 StoreNode->getMemOperand());
1382 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001383 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001384 // Convert pointer from byte address to dword address.
1385 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1386 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001387 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001388
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001389 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001390 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001391 } else {
1392 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1393 }
1394 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001395 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001396 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001397
Matt Arsenault95245662016-02-11 05:32:46 +00001398 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001399 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001400
Matt Arsenault95245662016-02-11 05:32:46 +00001401 EVT MemVT = StoreNode->getMemoryVT();
1402 if (MemVT.bitsLT(MVT::i32))
1403 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001404
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001405 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001406 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001407 const AMDGPUFrameLowering *TFL =
1408 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001409 unsigned StackWidth = TFL->getStackWidth(MF);
1410
1411 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1412
1413 if (ValueVT.isVector()) {
1414 unsigned NumElemVT = ValueVT.getVectorNumElements();
1415 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001416 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001417
1418 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1419 "vector width in load");
1420
1421 for (unsigned i = 0; i < NumElemVT; ++i) {
1422 unsigned Channel, PtrIncr;
1423 getStackAddress(StackWidth, i, Channel, PtrIncr);
1424 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001425 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001426 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001427 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001428
1429 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1430 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001431 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001432 }
Craig Topper48d114b2014-04-26 18:35:24 +00001433 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001434 } else {
1435 if (ValueVT == MVT::i8) {
1436 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1437 }
1438 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001439 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001440 }
1441
1442 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001443}
1444
Tom Stellard365366f2013-01-23 02:09:06 +00001445// return (512 + (kc_bank << 12)
1446static int
1447ConstantAddressBlock(unsigned AddressSpace) {
1448 switch (AddressSpace) {
1449 case AMDGPUAS::CONSTANT_BUFFER_0:
1450 return 512;
1451 case AMDGPUAS::CONSTANT_BUFFER_1:
1452 return 512 + 4096;
1453 case AMDGPUAS::CONSTANT_BUFFER_2:
1454 return 512 + 4096 * 2;
1455 case AMDGPUAS::CONSTANT_BUFFER_3:
1456 return 512 + 4096 * 3;
1457 case AMDGPUAS::CONSTANT_BUFFER_4:
1458 return 512 + 4096 * 4;
1459 case AMDGPUAS::CONSTANT_BUFFER_5:
1460 return 512 + 4096 * 5;
1461 case AMDGPUAS::CONSTANT_BUFFER_6:
1462 return 512 + 4096 * 6;
1463 case AMDGPUAS::CONSTANT_BUFFER_7:
1464 return 512 + 4096 * 7;
1465 case AMDGPUAS::CONSTANT_BUFFER_8:
1466 return 512 + 4096 * 8;
1467 case AMDGPUAS::CONSTANT_BUFFER_9:
1468 return 512 + 4096 * 9;
1469 case AMDGPUAS::CONSTANT_BUFFER_10:
1470 return 512 + 4096 * 10;
1471 case AMDGPUAS::CONSTANT_BUFFER_11:
1472 return 512 + 4096 * 11;
1473 case AMDGPUAS::CONSTANT_BUFFER_12:
1474 return 512 + 4096 * 12;
1475 case AMDGPUAS::CONSTANT_BUFFER_13:
1476 return 512 + 4096 * 13;
1477 case AMDGPUAS::CONSTANT_BUFFER_14:
1478 return 512 + 4096 * 14;
1479 case AMDGPUAS::CONSTANT_BUFFER_15:
1480 return 512 + 4096 * 15;
1481 default:
1482 return -1;
1483 }
1484}
1485
Matt Arsenault6dfda962016-02-10 18:21:39 +00001486SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1487 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001488 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001489 LoadSDNode *Load = cast<LoadSDNode>(Op);
1490 ISD::LoadExtType ExtType = Load->getExtensionType();
1491 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001492
Matt Arsenault6dfda962016-02-10 18:21:39 +00001493 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1494 // register (2-)byte extract.
1495
1496 // Get Register holding the target.
1497 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1498 DAG.getConstant(2, DL, MVT::i32));
1499 // Load the Register.
1500 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1501 Load->getChain(),
1502 Ptr,
1503 DAG.getTargetConstant(0, DL, MVT::i32),
1504 Op.getOperand(2));
1505
1506 // Get offset within the register.
1507 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1508 Load->getBasePtr(),
1509 DAG.getConstant(0x3, DL, MVT::i32));
1510
1511 // Bit offset of target byte (byteIdx * 8).
1512 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1513 DAG.getConstant(3, DL, MVT::i32));
1514
1515 // Shift to the right.
1516 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1517
1518 // Eliminate the upper bits by setting them to ...
1519 EVT MemEltVT = MemVT.getScalarType();
1520
1521 // ... ones.
1522 if (ExtType == ISD::SEXTLOAD) {
1523 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1524
1525 SDValue Ops[] = {
1526 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1527 Load->getChain()
1528 };
1529
1530 return DAG.getMergeValues(Ops, DL);
1531 }
1532
1533 // ... or zeros.
1534 SDValue Ops[] = {
1535 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1536 Load->getChain()
1537 };
1538
1539 return DAG.getMergeValues(Ops, DL);
1540}
1541
1542SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1543 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1544 unsigned AS = LoadNode->getAddressSpace();
1545 EVT MemVT = LoadNode->getMemoryVT();
1546 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1547
1548 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1549 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1550 return lowerPrivateExtLoad(Op, DAG);
1551 }
1552
1553 SDLoc DL(Op);
1554 EVT VT = Op.getValueType();
1555 SDValue Chain = LoadNode->getChain();
1556 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001557
Tom Stellard067c8152014-07-21 14:01:14 +00001558 // Lower loads constant address space global variable loads
1559 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001560 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001561 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001562
Mehdi Amini44ede332015-07-09 02:09:04 +00001563 SDValue Ptr = DAG.getZExtOrTrunc(
1564 LoadNode->getBasePtr(), DL,
1565 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001566 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001567 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001568 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1569 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001570 DAG.getTargetConstant(0, DL, MVT::i32),
1571 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001572 }
Tom Stellarde9373602014-01-22 19:24:14 +00001573
Tom Stellard35bb18c2013-08-26 15:06:04 +00001574 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1575 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001576 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001577 Chain
1578 };
Craig Topper64941d92014-04-27 19:20:57 +00001579 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001580 }
1581
Tom Stellard365366f2013-01-23 02:09:06 +00001582 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001583 if (ConstantBlock > -1 &&
1584 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1585 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001586 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001587 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1588 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001589 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001590 SDValue Slots[4];
1591 for (unsigned i = 0; i < 4; i++) {
1592 // We want Const position encoded with the following formula :
1593 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1594 // const_index is Ptr computed by llvm using an alignment of 16.
1595 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1596 // then div by 4 at the ISel step
1597 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001598 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001599 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1600 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001601 EVT NewVT = MVT::v4i32;
1602 unsigned NumElements = 4;
1603 if (VT.isVector()) {
1604 NewVT = VT;
1605 NumElements = VT.getVectorNumElements();
1606 }
Craig Topper48d114b2014-04-26 18:35:24 +00001607 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001608 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001609 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001610 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001611 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001612 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1613 DAG.getConstant(4, DL, MVT::i32)),
1614 DAG.getConstant(LoadNode->getAddressSpace() -
1615 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001616 );
1617 }
1618
1619 if (!VT.isVector()) {
1620 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001621 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001622 }
1623
1624 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001625 Result,
1626 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001627 };
Craig Topper64941d92014-04-27 19:20:57 +00001628 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001629 }
1630
Matt Arsenault6dfda962016-02-10 18:21:39 +00001631 SDValue LoweredLoad;
1632
Matt Arsenault909d0c02013-10-30 23:43:29 +00001633 // For most operations returning SDValue() will result in the node being
1634 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1635 // need to manually expand loads that may be legal in some address spaces and
1636 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1637 // compute shaders, since the data is sign extended when it is uploaded to the
1638 // buffer. However SEXT loads from other address spaces are not supported, so
1639 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001640 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1641 EVT MemVT = LoadNode->getMemoryVT();
1642 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001643 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1644 LoadNode->getPointerInfo(), MemVT,
1645 LoadNode->isVolatile(),
1646 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001647 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001648 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001649 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1650 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001651
Jan Veselyb670d372015-05-26 18:07:22 +00001652 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001653 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001654 }
1655
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001656 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1657 return SDValue();
1658 }
1659
1660 // Lowering for indirect addressing
1661 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001662 const AMDGPUFrameLowering *TFL =
1663 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001664 unsigned StackWidth = TFL->getStackWidth(MF);
1665
1666 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1667
1668 if (VT.isVector()) {
1669 unsigned NumElemVT = VT.getVectorNumElements();
1670 EVT ElemVT = VT.getVectorElementType();
1671 SDValue Loads[4];
1672
1673 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1674 "vector width in load");
1675
1676 for (unsigned i = 0; i < NumElemVT; ++i) {
1677 unsigned Channel, PtrIncr;
1678 getStackAddress(StackWidth, i, Channel, PtrIncr);
1679 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001680 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001681 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1682 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001683 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001684 Op.getOperand(2));
1685 }
1686 for (unsigned i = NumElemVT; i < 4; ++i) {
1687 Loads[i] = DAG.getUNDEF(ElemVT);
1688 }
1689 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001690 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001691 } else {
1692 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1693 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001694 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001695 Op.getOperand(2));
1696 }
1697
Matt Arsenault7939acd2014-04-07 16:44:24 +00001698 SDValue Ops[2] = {
1699 LoweredLoad,
1700 Chain
1701 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001702
Craig Topper64941d92014-04-27 19:20:57 +00001703 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001704}
Tom Stellard75aadc22012-12-11 21:25:42 +00001705
Matt Arsenault1d555c42014-06-23 18:00:55 +00001706SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1707 SDValue Chain = Op.getOperand(0);
1708 SDValue Cond = Op.getOperand(1);
1709 SDValue Jump = Op.getOperand(2);
1710
1711 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1712 Chain, Jump, Cond);
1713}
1714
Matt Arsenault81d06012016-03-07 21:10:13 +00001715SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1716 SelectionDAG &DAG) const {
1717 MachineFunction &MF = DAG.getMachineFunction();
1718 const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
1719
1720 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1721
1722 unsigned FrameIndex = FIN->getIndex();
1723 unsigned IgnoredFrameReg;
1724 unsigned Offset =
1725 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1726 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1727 Op.getValueType());
1728}
1729
Tom Stellard75aadc22012-12-11 21:25:42 +00001730/// XXX Only kernel functions are supported, so we can assume for now that
1731/// every function is a kernel function, but in the future we should use
1732/// separate calling conventions for kernel and non-kernel functions.
1733SDValue R600TargetLowering::LowerFormalArguments(
1734 SDValue Chain,
1735 CallingConv::ID CallConv,
1736 bool isVarArg,
1737 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001738 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001739 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001740 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001741 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1742 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001743 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001744 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001745
Tom Stellardaf775432013-10-23 00:44:32 +00001746 SmallVector<ISD::InputArg, 8> LocalIns;
1747
Matt Arsenault209a7b92014-04-18 07:40:20 +00001748 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001749
1750 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001751
Tom Stellard1e803092013-07-23 01:48:18 +00001752 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001753 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001754 const ISD::InputArg &In = Ins[i];
1755 EVT VT = In.VT;
1756 EVT MemVT = VA.getLocVT();
1757 if (!VT.isVector() && MemVT.isVector()) {
1758 // Get load source type if scalarized.
1759 MemVT = MemVT.getVectorElementType();
1760 }
Tom Stellard78e01292013-07-23 01:47:58 +00001761
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001762 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001763 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1764 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1765 InVals.push_back(Register);
1766 continue;
1767 }
1768
Tom Stellard75aadc22012-12-11 21:25:42 +00001769 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001770 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001771
Matt Arsenaultfae02982014-03-17 18:58:11 +00001772 // i64 isn't a legal type, so the register type used ends up as i32, which
1773 // isn't expected here. It attempts to create this sextload, but it ends up
1774 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1775 // for <1 x i64>.
1776
Tom Stellardacfeebf2013-07-23 01:48:05 +00001777 // The first 36 bytes of the input buffer contains information about
1778 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001779 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1780 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1781 // FIXME: This should really check the extload type, but the handling of
1782 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001783
Matt Arsenault74ef2772014-08-13 18:14:11 +00001784 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1785 Ext = ISD::SEXTLOAD;
1786 }
1787
1788 // Compute the offset from the value.
1789 // XXX - I think PartOffset should give you this, but it seems to give the
1790 // size of the register which isn't useful.
1791
Andrew Trick05938a52015-02-16 18:10:47 +00001792 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001793 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001794 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001795
1796 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1797 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001798 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001799 DAG.getUNDEF(MVT::i32),
1800 PtrInfo,
1801 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001802
1803 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001804 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001805 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001806 }
1807 return Chain;
1808}
1809
Mehdi Amini44ede332015-07-09 02:09:04 +00001810EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1811 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001812 if (!VT.isVector())
1813 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001814 return VT.changeVectorElementTypeToInteger();
1815}
1816
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001817bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1818 unsigned AddrSpace,
1819 unsigned Align,
1820 bool *IsFast) const {
1821 if (IsFast)
1822 *IsFast = false;
1823
1824 if (!VT.isSimple() || VT == MVT::Other)
1825 return false;
1826
1827 if (VT.bitsLT(MVT::i32))
1828 return false;
1829
1830 // TODO: This is a rough estimate.
1831 if (IsFast)
1832 *IsFast = true;
1833
1834 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1835}
1836
Matt Arsenault209a7b92014-04-18 07:40:20 +00001837static SDValue CompactSwizzlableVector(
1838 SelectionDAG &DAG, SDValue VectorEntry,
1839 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001840 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1841 assert(RemapSwizzle.empty());
1842 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001843 VectorEntry.getOperand(0),
1844 VectorEntry.getOperand(1),
1845 VectorEntry.getOperand(2),
1846 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001847 };
1848
1849 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001850 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001851 // We mask write here to teach later passes that the ith element of this
1852 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1853 // break false dependencies and additionnaly make assembly easier to read.
1854 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001855 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1856 if (C->isZero()) {
1857 RemapSwizzle[i] = 4; // SEL_0
1858 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1859 } else if (C->isExactlyValue(1.0)) {
1860 RemapSwizzle[i] = 5; // SEL_1
1861 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1862 }
1863 }
1864
Sanjay Patel57195842016-03-14 17:28:46 +00001865 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001866 continue;
1867 for (unsigned j = 0; j < i; j++) {
1868 if (NewBldVec[i] == NewBldVec[j]) {
1869 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1870 RemapSwizzle[i] = j;
1871 break;
1872 }
1873 }
1874 }
1875
1876 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001877 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001878}
1879
Benjamin Kramer193960c2013-06-11 13:32:25 +00001880static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1881 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001882 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1883 assert(RemapSwizzle.empty());
1884 SDValue NewBldVec[4] = {
1885 VectorEntry.getOperand(0),
1886 VectorEntry.getOperand(1),
1887 VectorEntry.getOperand(2),
1888 VectorEntry.getOperand(3)
1889 };
1890 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001891 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001892 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001893 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1894 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1895 ->getZExtValue();
1896 if (i == Idx)
1897 isUnmovable[Idx] = true;
1898 }
1899 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001900
1901 for (unsigned i = 0; i < 4; i++) {
1902 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1903 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1904 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001905 if (isUnmovable[Idx])
1906 continue;
1907 // Swap i and Idx
1908 std::swap(NewBldVec[Idx], NewBldVec[i]);
1909 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1910 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001911 }
1912 }
1913
1914 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001915 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001916}
1917
1918
1919SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001920 SDValue Swz[4], SelectionDAG &DAG,
1921 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001922 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1923 // Old -> New swizzle values
1924 DenseMap<unsigned, unsigned> SwizzleRemap;
1925
1926 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1927 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001928 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001929 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001930 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001931 }
1932
1933 SwizzleRemap.clear();
1934 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1935 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001936 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001937 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001938 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001939 }
1940
1941 return BuildVector;
1942}
1943
1944
Tom Stellard75aadc22012-12-11 21:25:42 +00001945//===----------------------------------------------------------------------===//
1946// Custom DAG Optimizations
1947//===----------------------------------------------------------------------===//
1948
1949SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1950 DAGCombinerInfo &DCI) const {
1951 SelectionDAG &DAG = DCI.DAG;
1952
1953 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001954 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001955 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1956 case ISD::FP_ROUND: {
1957 SDValue Arg = N->getOperand(0);
1958 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001959 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001960 Arg.getOperand(0));
1961 }
1962 break;
1963 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001964
1965 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1966 // (i32 select_cc f32, f32, -1, 0 cc)
1967 //
1968 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1969 // this to one of the SET*_DX10 instructions.
1970 case ISD::FP_TO_SINT: {
1971 SDValue FNeg = N->getOperand(0);
1972 if (FNeg.getOpcode() != ISD::FNEG) {
1973 return SDValue();
1974 }
1975 SDValue SelectCC = FNeg.getOperand(0);
1976 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1977 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1978 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1979 !isHWTrueValue(SelectCC.getOperand(2)) ||
1980 !isHWFalseValue(SelectCC.getOperand(3))) {
1981 return SDValue();
1982 }
1983
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001984 SDLoc dl(N);
1985 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001986 SelectCC.getOperand(0), // LHS
1987 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001988 DAG.getConstant(-1, dl, MVT::i32), // True
1989 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001990 SelectCC.getOperand(4)); // CC
1991
1992 break;
1993 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001994
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001995 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1996 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001997 case ISD::INSERT_VECTOR_ELT: {
1998 SDValue InVec = N->getOperand(0);
1999 SDValue InVal = N->getOperand(1);
2000 SDValue EltNo = N->getOperand(2);
2001 SDLoc dl(N);
2002
2003 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00002004 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00002005 return InVec;
2006
2007 EVT VT = InVec.getValueType();
2008
2009 // If we can't generate a legal BUILD_VECTOR, exit
2010 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
2011 return SDValue();
2012
2013 // Check that we know which element is being inserted
2014 if (!isa<ConstantSDNode>(EltNo))
2015 return SDValue();
2016 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2017
2018 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2019 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2020 // vector elements.
2021 SmallVector<SDValue, 8> Ops;
2022 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2023 Ops.append(InVec.getNode()->op_begin(),
2024 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00002025 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00002026 unsigned NElts = VT.getVectorNumElements();
2027 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2028 } else {
2029 return SDValue();
2030 }
2031
2032 // Insert the element
2033 if (Elt < Ops.size()) {
2034 // All the operands of BUILD_VECTOR must have the same type;
2035 // we enforce that here.
2036 EVT OpVT = Ops[0].getValueType();
2037 if (InVal.getValueType() != OpVT)
2038 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2039 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2040 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2041 Ops[Elt] = InVal;
2042 }
2043
2044 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00002045 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002046 }
2047
Tom Stellard365366f2013-01-23 02:09:06 +00002048 // Extract_vec (Build_vector) generated by custom lowering
2049 // also needs to be customly combined
2050 case ISD::EXTRACT_VECTOR_ELT: {
2051 SDValue Arg = N->getOperand(0);
2052 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2053 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2054 unsigned Element = Const->getZExtValue();
2055 return Arg->getOperand(Element);
2056 }
2057 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002058 if (Arg.getOpcode() == ISD::BITCAST &&
2059 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2060 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2061 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002062 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002063 Arg->getOperand(0).getOperand(Element));
2064 }
2065 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002066 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002067 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002068
2069 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002070 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002071 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002072 return Ret;
2073
Tom Stellarde06163a2013-02-07 14:02:35 +00002074 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2075 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002076 //
2077 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2078 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002079 SDValue LHS = N->getOperand(0);
2080 if (LHS.getOpcode() != ISD::SELECT_CC) {
2081 return SDValue();
2082 }
2083
2084 SDValue RHS = N->getOperand(1);
2085 SDValue True = N->getOperand(2);
2086 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002087 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002088
2089 if (LHS.getOperand(2).getNode() != True.getNode() ||
2090 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002091 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002092 return SDValue();
2093 }
2094
Tom Stellard5e524892013-03-08 15:37:11 +00002095 switch (NCC) {
2096 default: return SDValue();
2097 case ISD::SETNE: return LHS;
2098 case ISD::SETEQ: {
2099 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2100 LHSCC = ISD::getSetCCInverse(LHSCC,
2101 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002102 if (DCI.isBeforeLegalizeOps() ||
2103 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2104 return DAG.getSelectCC(SDLoc(N),
2105 LHS.getOperand(0),
2106 LHS.getOperand(1),
2107 LHS.getOperand(2),
2108 LHS.getOperand(3),
2109 LHSCC);
2110 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002111 }
Tom Stellard5e524892013-03-08 15:37:11 +00002112 }
Tom Stellardcd428182013-09-28 02:50:38 +00002113 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002114 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002115
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002116 case AMDGPUISD::EXPORT: {
2117 SDValue Arg = N->getOperand(1);
2118 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2119 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002120
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002121 SDValue NewArgs[8] = {
2122 N->getOperand(0), // Chain
2123 SDValue(),
2124 N->getOperand(2), // ArrayBase
2125 N->getOperand(3), // Type
2126 N->getOperand(4), // SWZ_X
2127 N->getOperand(5), // SWZ_Y
2128 N->getOperand(6), // SWZ_Z
2129 N->getOperand(7) // SWZ_W
2130 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002131 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002132 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002133 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002134 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002135 case AMDGPUISD::TEXTURE_FETCH: {
2136 SDValue Arg = N->getOperand(1);
2137 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2138 break;
2139
2140 SDValue NewArgs[19] = {
2141 N->getOperand(0),
2142 N->getOperand(1),
2143 N->getOperand(2),
2144 N->getOperand(3),
2145 N->getOperand(4),
2146 N->getOperand(5),
2147 N->getOperand(6),
2148 N->getOperand(7),
2149 N->getOperand(8),
2150 N->getOperand(9),
2151 N->getOperand(10),
2152 N->getOperand(11),
2153 N->getOperand(12),
2154 N->getOperand(13),
2155 N->getOperand(14),
2156 N->getOperand(15),
2157 N->getOperand(16),
2158 N->getOperand(17),
2159 N->getOperand(18),
2160 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002161 SDLoc DL(N);
2162 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2163 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002164 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002165 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002166
2167 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002168}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002169
2170static bool
2171FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002172 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002173 const R600InstrInfo *TII =
2174 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002175 if (!Src.isMachineOpcode())
2176 return false;
2177 switch (Src.getMachineOpcode()) {
2178 case AMDGPU::FNEG_R600:
2179 if (!Neg.getNode())
2180 return false;
2181 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002182 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002183 return true;
2184 case AMDGPU::FABS_R600:
2185 if (!Abs.getNode())
2186 return false;
2187 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002188 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002189 return true;
2190 case AMDGPU::CONST_COPY: {
2191 unsigned Opcode = ParentNode->getMachineOpcode();
2192 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2193
2194 if (!Sel.getNode())
2195 return false;
2196
2197 SDValue CstOffset = Src.getOperand(0);
2198 if (ParentNode->getValueType(0).isVector())
2199 return false;
2200
2201 // Gather constants values
2202 int SrcIndices[] = {
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2214 };
2215 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002216 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002217 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2218 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2219 continue;
2220 if (HasDst) {
2221 OtherSrcIdx--;
2222 OtherSelIdx--;
2223 }
2224 if (RegisterSDNode *Reg =
2225 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2226 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002227 ConstantSDNode *Cst
2228 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002229 Consts.push_back(Cst->getZExtValue());
2230 }
2231 }
2232 }
2233
Matt Arsenault37c12d72014-05-12 20:42:57 +00002234 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002235 Consts.push_back(Cst->getZExtValue());
2236 if (!TII->fitsConstReadLimitations(Consts)) {
2237 return false;
2238 }
2239
2240 Sel = CstOffset;
2241 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2242 return true;
2243 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002244 case AMDGPU::MOV_IMM_I32:
2245 case AMDGPU::MOV_IMM_F32: {
2246 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2247 uint64_t ImmValue = 0;
2248
2249
2250 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2251 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2252 float FloatValue = FPC->getValueAPF().convertToFloat();
2253 if (FloatValue == 0.0) {
2254 ImmReg = AMDGPU::ZERO;
2255 } else if (FloatValue == 0.5) {
2256 ImmReg = AMDGPU::HALF;
2257 } else if (FloatValue == 1.0) {
2258 ImmReg = AMDGPU::ONE;
2259 } else {
2260 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2261 }
2262 } else {
2263 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2264 uint64_t Value = C->getZExtValue();
2265 if (Value == 0) {
2266 ImmReg = AMDGPU::ZERO;
2267 } else if (Value == 1) {
2268 ImmReg = AMDGPU::ONE_INT;
2269 } else {
2270 ImmValue = Value;
2271 }
2272 }
2273
2274 // Check that we aren't already using an immediate.
2275 // XXX: It's possible for an instruction to have more than one
2276 // immediate operand, but this is not supported yet.
2277 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2278 if (!Imm.getNode())
2279 return false;
2280 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2281 assert(C);
2282 if (C->getZExtValue())
2283 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002284 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002285 }
2286 Src = DAG.getRegister(ImmReg, MVT::i32);
2287 return true;
2288 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002289 default:
2290 return false;
2291 }
2292}
2293
2294
2295/// \brief Fold the instructions after selecting them
2296SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2297 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002298 const R600InstrInfo *TII =
2299 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002300 if (!Node->isMachineOpcode())
2301 return Node;
2302 unsigned Opcode = Node->getMachineOpcode();
2303 SDValue FakeOp;
2304
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002305 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002306
2307 if (Opcode == AMDGPU::DOT_4) {
2308 int OperandIdx[] = {
2309 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2310 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2311 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2312 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2313 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2314 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2315 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2316 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002317 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002318 int NegIdx[] = {
2319 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2320 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2321 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2322 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2323 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2324 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2325 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2326 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2327 };
2328 int AbsIdx[] = {
2329 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2330 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2331 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2332 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2333 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2334 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2335 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2336 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2337 };
2338 for (unsigned i = 0; i < 8; i++) {
2339 if (OperandIdx[i] < 0)
2340 return Node;
2341 SDValue &Src = Ops[OperandIdx[i] - 1];
2342 SDValue &Neg = Ops[NegIdx[i] - 1];
2343 SDValue &Abs = Ops[AbsIdx[i] - 1];
2344 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2345 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2346 if (HasDst)
2347 SelIdx--;
2348 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002349 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2350 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2351 }
2352 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2353 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2354 SDValue &Src = Ops[i];
2355 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002356 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2357 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002358 } else if (Opcode == AMDGPU::CLAMP_R600) {
2359 SDValue Src = Node->getOperand(0);
2360 if (!Src.isMachineOpcode() ||
2361 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2362 return Node;
2363 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2364 AMDGPU::OpName::clamp);
2365 if (ClampIdx < 0)
2366 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002367 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002368 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002369 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2370 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2371 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002372 } else {
2373 if (!TII->hasInstrModifiers(Opcode))
2374 return Node;
2375 int OperandIdx[] = {
2376 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2377 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2378 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2379 };
2380 int NegIdx[] = {
2381 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2382 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2383 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2384 };
2385 int AbsIdx[] = {
2386 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2387 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2388 -1
2389 };
2390 for (unsigned i = 0; i < 3; i++) {
2391 if (OperandIdx[i] < 0)
2392 return Node;
2393 SDValue &Src = Ops[OperandIdx[i] - 1];
2394 SDValue &Neg = Ops[NegIdx[i] - 1];
2395 SDValue FakeAbs;
2396 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2397 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2398 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002399 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2400 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002401 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002402 ImmIdx--;
2403 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002404 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002405 SDValue &Imm = Ops[ImmIdx];
2406 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002407 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2408 }
2409 }
2410
2411 return Node;
2412}