blob: 5ad827e88649386ed37319744cca33a08d7e59f6 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Jan Veselyf97de002016-05-13 20:39:29 +0000272 case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
273 //TODO: Perhaps combine this instruction with the next if possible
274 auto MIB = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
275 MI->getOperand(0).getReg(),
276 AMDGPU::ALU_LITERAL_X);
277 int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
278 //TODO: Ugh this is rather ugly
279 MIB->getOperand(Idx) = MI->getOperand(1);
280 break;
281 }
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000282 case AMDGPU::CONST_COPY: {
283 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
284 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000285 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000286 MI->getOperand(1).getImm());
287 break;
288 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000289
290 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000291 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000292 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000296 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000297 break;
298 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000299 case AMDGPU::RAT_STORE_TYPED_eg: {
300 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
301 .addOperand(MI->getOperand(0))
302 .addOperand(MI->getOperand(1))
303 .addOperand(MI->getOperand(2))
304 .addImm(isEOP(I)); // Set End of program bit
305 break;
306 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000307
Tom Stellard75aadc22012-12-11 21:25:42 +0000308 case AMDGPU::TXD: {
309 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
310 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000311 MachineOperand &RID = MI->getOperand(4);
312 MachineOperand &SID = MI->getOperand(5);
313 unsigned TextureId = MI->getOperand(6).getImm();
314 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
315 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000316
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000317 switch (TextureId) {
318 case 5: // Rect
319 CTX = CTY = 0;
320 break;
321 case 6: // Shadow1D
322 SrcW = SrcZ;
323 break;
324 case 7: // Shadow2D
325 SrcW = SrcZ;
326 break;
327 case 8: // ShadowRect
328 CTX = CTY = 0;
329 SrcW = SrcZ;
330 break;
331 case 9: // 1DArray
332 SrcZ = SrcY;
333 CTZ = 0;
334 break;
335 case 10: // 2DArray
336 CTZ = 0;
337 break;
338 case 11: // Shadow1DArray
339 SrcZ = SrcY;
340 CTZ = 0;
341 break;
342 case 12: // Shadow2DArray
343 CTZ = 0;
344 break;
345 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000346 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
347 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000348 .addImm(SrcX)
349 .addImm(SrcY)
350 .addImm(SrcZ)
351 .addImm(SrcW)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(1)
357 .addImm(2)
358 .addImm(3)
359 .addOperand(RID)
360 .addOperand(SID)
361 .addImm(CTX)
362 .addImm(CTY)
363 .addImm(CTZ)
364 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000365 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
366 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
385 .addOperand(MI->getOperand(0))
386 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000387 .addImm(SrcX)
388 .addImm(SrcY)
389 .addImm(SrcZ)
390 .addImm(SrcW)
391 .addImm(0)
392 .addImm(0)
393 .addImm(0)
394 .addImm(0)
395 .addImm(1)
396 .addImm(2)
397 .addImm(3)
398 .addOperand(RID)
399 .addOperand(SID)
400 .addImm(CTX)
401 .addImm(CTY)
402 .addImm(CTZ)
403 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000404 .addReg(T0, RegState::Implicit)
405 .addReg(T1, RegState::Implicit);
406 break;
407 }
408
409 case AMDGPU::TXD_SHADOW: {
410 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
411 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000412 MachineOperand &RID = MI->getOperand(4);
413 MachineOperand &SID = MI->getOperand(5);
414 unsigned TextureId = MI->getOperand(6).getImm();
415 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
416 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
417
418 switch (TextureId) {
419 case 5: // Rect
420 CTX = CTY = 0;
421 break;
422 case 6: // Shadow1D
423 SrcW = SrcZ;
424 break;
425 case 7: // Shadow2D
426 SrcW = SrcZ;
427 break;
428 case 8: // ShadowRect
429 CTX = CTY = 0;
430 SrcW = SrcZ;
431 break;
432 case 9: // 1DArray
433 SrcZ = SrcY;
434 CTZ = 0;
435 break;
436 case 10: // 2DArray
437 CTZ = 0;
438 break;
439 case 11: // Shadow1DArray
440 SrcZ = SrcY;
441 CTZ = 0;
442 break;
443 case 12: // Shadow2DArray
444 CTZ = 0;
445 break;
446 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000447
448 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
449 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000450 .addImm(SrcX)
451 .addImm(SrcY)
452 .addImm(SrcZ)
453 .addImm(SrcW)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(0)
458 .addImm(1)
459 .addImm(2)
460 .addImm(3)
461 .addOperand(RID)
462 .addOperand(SID)
463 .addImm(CTX)
464 .addImm(CTY)
465 .addImm(CTZ)
466 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000467 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
468 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000469 .addImm(SrcX)
470 .addImm(SrcY)
471 .addImm(SrcZ)
472 .addImm(SrcW)
473 .addImm(0)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(1)
478 .addImm(2)
479 .addImm(3)
480 .addOperand(RID)
481 .addOperand(SID)
482 .addImm(CTX)
483 .addImm(CTY)
484 .addImm(CTZ)
485 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
487 .addOperand(MI->getOperand(0))
488 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000489 .addImm(SrcX)
490 .addImm(SrcY)
491 .addImm(SrcZ)
492 .addImm(SrcW)
493 .addImm(0)
494 .addImm(0)
495 .addImm(0)
496 .addImm(0)
497 .addImm(1)
498 .addImm(2)
499 .addImm(3)
500 .addOperand(RID)
501 .addOperand(SID)
502 .addImm(CTX)
503 .addImm(CTY)
504 .addImm(CTZ)
505 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 .addReg(T0, RegState::Implicit)
507 .addReg(T1, RegState::Implicit);
508 break;
509 }
510
511 case AMDGPU::BRANCH:
512 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000513 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 break;
515
516 case AMDGPU::BRANCH_COND_f32: {
517 MachineInstr *NewMI =
518 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
519 AMDGPU::PREDICATE_BIT)
520 .addOperand(MI->getOperand(1))
521 .addImm(OPCODE_IS_NOT_ZERO)
522 .addImm(0); // Flags
523 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000524 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 .addOperand(MI->getOperand(0))
526 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
527 break;
528 }
529
530 case AMDGPU::BRANCH_COND_i32: {
531 MachineInstr *NewMI =
532 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
533 AMDGPU::PREDICATE_BIT)
534 .addOperand(MI->getOperand(1))
535 .addImm(OPCODE_IS_NOT_ZERO_INT)
536 .addImm(0); // Flags
537 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000538 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000539 .addOperand(MI->getOperand(0))
540 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
541 break;
542 }
543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 case AMDGPU::EG_ExportSwz:
545 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000546 // Instruction is left unmodified if its not the last one of its type
547 bool isLastInstructionOfItsType = true;
548 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000549 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000550 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000551 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000552 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
553 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
554 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
555 .getImm();
556 if (CurrentInstExportType == InstExportType) {
557 isLastInstructionOfItsType = false;
558 break;
559 }
560 }
561 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000562 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000563 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000564 return BB;
565 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
566 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
567 .addOperand(MI->getOperand(0))
568 .addOperand(MI->getOperand(1))
569 .addOperand(MI->getOperand(2))
570 .addOperand(MI->getOperand(3))
571 .addOperand(MI->getOperand(4))
572 .addOperand(MI->getOperand(5))
573 .addOperand(MI->getOperand(6))
574 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000575 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000576 break;
577 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000578 case AMDGPU::RETURN: {
579 // RETURN instructions must have the live-out registers as implicit uses,
580 // otherwise they appear dead.
581 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
582 MachineInstrBuilder MIB(*MF, MI);
583 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
584 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
585 return BB;
586 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000587 }
588
589 MI->eraseFromParent();
590 return BB;
591}
592
593//===----------------------------------------------------------------------===//
594// Custom DAG Lowering Operations
595//===----------------------------------------------------------------------===//
596
Tom Stellard75aadc22012-12-11 21:25:42 +0000597SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000598 MachineFunction &MF = DAG.getMachineFunction();
599 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000600 switch (Op.getOpcode()) {
601 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000602 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
603 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000604 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000605 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000606 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000607 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
608 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000609 case ISD::FCOS:
610 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000611 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000612 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000613 case ISD::LOAD: {
614 SDValue Result = LowerLOAD(Op, DAG);
615 assert((!Result.getNode() ||
616 Result.getNode()->getNumValues() == 2) &&
617 "Load should return a value and a chain");
618 return Result;
619 }
620
Matt Arsenault1d555c42014-06-23 18:00:55 +0000621 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000622 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000623 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000624 case ISD::INTRINSIC_VOID: {
625 SDValue Chain = Op.getOperand(0);
626 unsigned IntrinsicID =
627 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
628 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000629 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000630 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000631 const SDValue Args[8] = {
632 Chain,
633 Op.getOperand(2), // Export Value
634 Op.getOperand(3), // ArrayBase
635 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000636 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
637 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
638 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
639 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000640 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000641 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000642 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000643
Tom Stellard75aadc22012-12-11 21:25:42 +0000644 // default for switch(IntrinsicID)
645 default: break;
646 }
647 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
648 break;
649 }
650 case ISD::INTRINSIC_WO_CHAIN: {
651 unsigned IntrinsicID =
652 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
653 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000654 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000655 switch(IntrinsicID) {
656 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000657 case AMDGPUIntrinsic::R600_interp_xy:
658 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000659 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000660 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000661 SDValue RegisterINode = Op.getOperand(2);
662 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000663
Vincent Lejeunef143af32013-11-11 22:10:24 +0000664 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000665 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000666 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000667 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000668 else
669 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000670 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000671 RegisterJNode, RegisterINode);
Ahmed Bougacha128f8732016-04-26 21:15:30 +0000672 return DAG.getBuildVector(MVT::v2f32, DL,
673 {SDValue(interp, 0), SDValue(interp, 1)});
Tom Stellard75aadc22012-12-11 21:25:42 +0000674 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000675 case AMDGPUIntrinsic::r600_tex:
676 case AMDGPUIntrinsic::r600_texc:
677 case AMDGPUIntrinsic::r600_txl:
678 case AMDGPUIntrinsic::r600_txlc:
679 case AMDGPUIntrinsic::r600_txb:
680 case AMDGPUIntrinsic::r600_txbc:
681 case AMDGPUIntrinsic::r600_txf:
682 case AMDGPUIntrinsic::r600_txq:
683 case AMDGPUIntrinsic::r600_ddx:
684 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000685 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000686 unsigned TextureOp;
687 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000688 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000689 TextureOp = 0;
690 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000691 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000692 TextureOp = 1;
693 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000694 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000695 TextureOp = 2;
696 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000697 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000698 TextureOp = 3;
699 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000700 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000701 TextureOp = 4;
702 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000703 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000704 TextureOp = 5;
705 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000706 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000707 TextureOp = 6;
708 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000709 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000710 TextureOp = 7;
711 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000712 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000713 TextureOp = 8;
714 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000715 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 TextureOp = 9;
717 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000718 case AMDGPUIntrinsic::R600_ldptr:
719 TextureOp = 10;
720 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 default:
722 llvm_unreachable("Unknow Texture Operation");
723 }
724
725 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000726 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000727 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000728 DAG.getConstant(0, DL, MVT::i32),
729 DAG.getConstant(1, DL, MVT::i32),
730 DAG.getConstant(2, DL, MVT::i32),
731 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000732 Op.getOperand(2),
733 Op.getOperand(3),
734 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000735 DAG.getConstant(0, DL, MVT::i32),
736 DAG.getConstant(1, DL, MVT::i32),
737 DAG.getConstant(2, DL, MVT::i32),
738 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000739 Op.getOperand(5),
740 Op.getOperand(6),
741 Op.getOperand(7),
742 Op.getOperand(8),
743 Op.getOperand(9),
744 Op.getOperand(10)
745 };
Craig Topper48d114b2014-04-26 18:35:24 +0000746 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000747 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000748 case AMDGPUIntrinsic::AMDGPU_dp4: {
749 SDValue Args[8] = {
750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000751 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000753 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000756 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000757 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000758 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000759 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000760 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000761 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000762 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000763 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000764 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000765 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000766 };
Craig Topper48d114b2014-04-26 18:35:24 +0000767 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000768 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000769
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000786 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000787 return LowerImplicitParameter(DAG, VT, DL, 8);
788
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000789 case Intrinsic::r600_read_workdim:
790 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000791 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
792 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
793 }
Jan Veselye5121f32014-10-14 20:05:26 +0000794
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
797 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
800 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
803 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
806 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
809 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000810 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
812 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000813
814 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000815 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenault79963e82016-02-13 01:03:00 +0000816 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000817
818 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000819 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000820 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
821 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 }
823 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
824 break;
825 }
826 } // end switch(Op.getOpcode())
827 return SDValue();
828}
829
830void R600TargetLowering::ReplaceNodeResults(SDNode *N,
831 SmallVectorImpl<SDValue> &Results,
832 SelectionDAG &DAG) const {
833 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000834 default:
835 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
836 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000837 case ISD::FP_TO_UINT:
838 if (N->getValueType(0) == MVT::i1) {
839 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
840 return;
841 }
842 // Fall-through. Since we don't care about out of bounds values
843 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
844 // considers some extra cases which are not necessary here.
845 case ISD::FP_TO_SINT: {
846 SDValue Result;
847 if (expandFP_TO_SINT(N, Result, DAG))
848 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000849 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000850 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000851 case ISD::SDIVREM: {
852 SDValue Op = SDValue(N, 1);
853 SDValue RES = LowerSDIVREM(Op, DAG);
854 Results.push_back(RES);
855 Results.push_back(RES.getValue(1));
856 break;
857 }
858 case ISD::UDIVREM: {
859 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000860 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000861 break;
862 }
863 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000864}
865
Tom Stellard880a80a2014-06-17 16:53:14 +0000866SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
867 SDValue Vector) const {
868
869 SDLoc DL(Vector);
870 EVT VecVT = Vector.getValueType();
871 EVT EltVT = VecVT.getVectorElementType();
872 SmallVector<SDValue, 8> Args;
873
874 for (unsigned i = 0, e = VecVT.getVectorNumElements();
875 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000876 Args.push_back(DAG.getNode(
877 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
878 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000879 }
880
881 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
882}
883
884SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
885 SelectionDAG &DAG) const {
886
887 SDLoc DL(Op);
888 SDValue Vector = Op.getOperand(0);
889 SDValue Index = Op.getOperand(1);
890
891 if (isa<ConstantSDNode>(Index) ||
892 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
893 return Op;
894
895 Vector = vectorToVerticalVector(DAG, Vector);
896 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
897 Vector, Index);
898}
899
900SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
901 SelectionDAG &DAG) const {
902 SDLoc DL(Op);
903 SDValue Vector = Op.getOperand(0);
904 SDValue Value = Op.getOperand(1);
905 SDValue Index = Op.getOperand(2);
906
907 if (isa<ConstantSDNode>(Index) ||
908 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
909 return Op;
910
911 Vector = vectorToVerticalVector(DAG, Vector);
912 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
913 Vector, Value, Index);
914 return vectorToVerticalVector(DAG, Insert);
915}
916
Tom Stellard27233b72016-05-02 18:05:17 +0000917SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
918 SDValue Op,
919 SelectionDAG &DAG) const {
920
921 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
922 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
923 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
924
925 const DataLayout &DL = DAG.getDataLayout();
926 const GlobalValue *GV = GSD->getGlobal();
Tom Stellard27233b72016-05-02 18:05:17 +0000927 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
928
Jan Veselyf97de002016-05-13 20:39:29 +0000929 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
930 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
Tom Stellard27233b72016-05-02 18:05:17 +0000931}
932
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000933SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
934 // On hw >= R700, COS/SIN input must be between -1. and 1.
935 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
936 EVT VT = Op.getValueType();
937 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000938 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000939
940 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000941 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
942 DAG.getNode(ISD::FADD, DL, VT,
943 DAG.getNode(ISD::FMUL, DL, VT, Arg,
944 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
945 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000946 unsigned TrigNode;
947 switch (Op.getOpcode()) {
948 case ISD::FCOS:
949 TrigNode = AMDGPUISD::COS_HW;
950 break;
951 case ISD::FSIN:
952 TrigNode = AMDGPUISD::SIN_HW;
953 break;
954 default:
955 llvm_unreachable("Wrong trig opcode");
956 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000957 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
958 DAG.getNode(ISD::FADD, DL, VT, FractPart,
959 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000960 if (Gen >= AMDGPUSubtarget::R700)
961 return TrigVal;
962 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000963 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
964 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000965}
966
Jan Vesely25f36272014-06-18 12:27:13 +0000967SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
968 SDLoc DL(Op);
969 EVT VT = Op.getValueType();
970
971 SDValue Lo = Op.getOperand(0);
972 SDValue Hi = Op.getOperand(1);
973 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000974 SDValue Zero = DAG.getConstant(0, DL, VT);
975 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000976
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000977 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
978 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000979 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
980 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
981
982 // The dance around Width1 is necessary for 0 special case.
983 // Without it the CompShift might be 32, producing incorrect results in
984 // Overflow. So we do the shift in two steps, the alternative is to
985 // add a conditional to filter the special case.
986
987 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
988 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
989
990 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
991 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
992 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
993
994 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
995 SDValue LoBig = Zero;
996
997 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
998 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
999
1000 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1001}
1002
Jan Vesely900ff2e2014-06-18 12:27:15 +00001003SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1004 SDLoc DL(Op);
1005 EVT VT = Op.getValueType();
1006
1007 SDValue Lo = Op.getOperand(0);
1008 SDValue Hi = Op.getOperand(1);
1009 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001010 SDValue Zero = DAG.getConstant(0, DL, VT);
1011 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001012
Jan Veselyecf51332014-06-18 12:27:17 +00001013 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1014
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001015 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1016 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001017 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1018 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1019
1020 // The dance around Width1 is necessary for 0 special case.
1021 // Without it the CompShift might be 32, producing incorrect results in
1022 // Overflow. So we do the shift in two steps, the alternative is to
1023 // add a conditional to filter the special case.
1024
1025 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1026 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1027
Jan Veselyecf51332014-06-18 12:27:17 +00001028 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001029 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1030 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1031
Jan Veselyecf51332014-06-18 12:27:17 +00001032 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1033 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001034
1035 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1036 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1037
1038 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1039}
1040
Jan Vesely808fff52015-04-30 17:15:56 +00001041SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1042 unsigned mainop, unsigned ovf) const {
1043 SDLoc DL(Op);
1044 EVT VT = Op.getValueType();
1045
1046 SDValue Lo = Op.getOperand(0);
1047 SDValue Hi = Op.getOperand(1);
1048
1049 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1050 // Extend sign.
1051 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1052 DAG.getValueType(MVT::i1));
1053
1054 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1055
1056 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1057}
1058
Tom Stellard75aadc22012-12-11 21:25:42 +00001059SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001060 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001061 return DAG.getNode(
1062 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001063 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001064 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001065 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001066 DAG.getCondCode(ISD::SETNE)
1067 );
1068}
1069
Tom Stellard75aadc22012-12-11 21:25:42 +00001070SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001071 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001072 unsigned DwordOffset) const {
1073 unsigned ByteOffset = DwordOffset * 4;
1074 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001075 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001076
1077 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1078 assert(isInt<16>(ByteOffset));
1079
1080 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001081 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001082 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1083 false, false, false, 0);
1084}
1085
Tom Stellard75aadc22012-12-11 21:25:42 +00001086bool R600TargetLowering::isZero(SDValue Op) const {
1087 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1088 return Cst->isNullValue();
1089 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1090 return CstFP->isZero();
1091 } else {
1092 return false;
1093 }
1094}
1095
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001096bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1097 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1098 return CFP->isExactlyValue(1.0);
1099 }
1100 return isAllOnesConstant(Op);
1101}
1102
1103bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1104 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1105 return CFP->getValueAPF().isZero();
1106 }
1107 return isNullConstant(Op);
1108}
1109
Tom Stellard75aadc22012-12-11 21:25:42 +00001110SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001111 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001112 EVT VT = Op.getValueType();
1113
1114 SDValue LHS = Op.getOperand(0);
1115 SDValue RHS = Op.getOperand(1);
1116 SDValue True = Op.getOperand(2);
1117 SDValue False = Op.getOperand(3);
1118 SDValue CC = Op.getOperand(4);
1119 SDValue Temp;
1120
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001121 if (VT == MVT::f32) {
1122 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1123 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1124 if (MinMax)
1125 return MinMax;
1126 }
1127
Tom Stellard75aadc22012-12-11 21:25:42 +00001128 // LHS and RHS are guaranteed to be the same value type
1129 EVT CompareVT = LHS.getValueType();
1130
1131 // Check if we can lower this to a native operation.
1132
Tom Stellard2add82d2013-03-08 15:37:09 +00001133 // Try to lower to a SET* instruction:
1134 //
1135 // SET* can match the following patterns:
1136 //
Tom Stellardcd428182013-09-28 02:50:38 +00001137 // select_cc f32, f32, -1, 0, cc_supported
1138 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1139 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001140 //
1141
1142 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001143 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1144 ISD::CondCode InverseCC =
1145 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001146 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1147 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1148 std::swap(False, True);
1149 CC = DAG.getCondCode(InverseCC);
1150 } else {
1151 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1152 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1153 std::swap(False, True);
1154 std::swap(LHS, RHS);
1155 CC = DAG.getCondCode(SwapInvCC);
1156 }
1157 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001158 }
1159
1160 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1161 (CompareVT == VT || VT == MVT::i32)) {
1162 // This can be matched by a SET* instruction.
1163 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1164 }
1165
Tom Stellard75aadc22012-12-11 21:25:42 +00001166 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001167 //
1168 // CND* can match the following patterns:
1169 //
Tom Stellardcd428182013-09-28 02:50:38 +00001170 // select_cc f32, 0.0, f32, f32, cc_supported
1171 // select_cc f32, 0.0, i32, i32, cc_supported
1172 // select_cc i32, 0, f32, f32, cc_supported
1173 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001174 //
Tom Stellardcd428182013-09-28 02:50:38 +00001175
1176 // Try to move the zero value to the RHS
1177 if (isZero(LHS)) {
1178 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1179 // Try swapping the operands
1180 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1181 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1182 std::swap(LHS, RHS);
1183 CC = DAG.getCondCode(CCSwapped);
1184 } else {
1185 // Try inverting the conditon and then swapping the operands
1186 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1187 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1188 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1189 std::swap(True, False);
1190 std::swap(LHS, RHS);
1191 CC = DAG.getCondCode(CCSwapped);
1192 }
1193 }
1194 }
1195 if (isZero(RHS)) {
1196 SDValue Cond = LHS;
1197 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001198 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1199 if (CompareVT != VT) {
1200 // Bitcast True / False to the correct types. This will end up being
1201 // a nop, but it allows us to define only a single pattern in the
1202 // .TD files for each CND* instruction rather than having to have
1203 // one pattern for integer True/False and one for fp True/False
1204 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1205 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1206 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001207
1208 switch (CCOpcode) {
1209 case ISD::SETONE:
1210 case ISD::SETUNE:
1211 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001212 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1213 Temp = True;
1214 True = False;
1215 False = Temp;
1216 break;
1217 default:
1218 break;
1219 }
1220 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1221 Cond, Zero,
1222 True, False,
1223 DAG.getCondCode(CCOpcode));
1224 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1225 }
1226
Tom Stellard75aadc22012-12-11 21:25:42 +00001227 // If we make it this for it means we have no native instructions to handle
1228 // this SELECT_CC, so we must lower it.
1229 SDValue HWTrue, HWFalse;
1230
1231 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001232 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1233 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001234 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001235 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1236 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001237 }
1238 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001239 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001240 }
1241
1242 // Lower this unsupported SELECT_CC into a combination of two supported
1243 // SELECT_CC operations.
1244 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1245
1246 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1247 Cond, HWFalse,
1248 True, False,
1249 DAG.getCondCode(ISD::SETNE));
1250}
1251
Alp Tokercb402912014-01-24 17:20:08 +00001252/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001253/// convert these pointers to a register index. Each register holds
1254/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1255/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1256/// for indirect addressing.
1257SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1258 unsigned StackWidth,
1259 SelectionDAG &DAG) const {
1260 unsigned SRLPad;
1261 switch(StackWidth) {
1262 case 1:
1263 SRLPad = 2;
1264 break;
1265 case 2:
1266 SRLPad = 3;
1267 break;
1268 case 4:
1269 SRLPad = 4;
1270 break;
1271 default: llvm_unreachable("Invalid stack width");
1272 }
1273
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001274 SDLoc DL(Ptr);
1275 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1276 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001277}
1278
1279void R600TargetLowering::getStackAddress(unsigned StackWidth,
1280 unsigned ElemIdx,
1281 unsigned &Channel,
1282 unsigned &PtrIncr) const {
1283 switch (StackWidth) {
1284 default:
1285 case 1:
1286 Channel = 0;
1287 if (ElemIdx > 0) {
1288 PtrIncr = 1;
1289 } else {
1290 PtrIncr = 0;
1291 }
1292 break;
1293 case 2:
1294 Channel = ElemIdx % 2;
1295 if (ElemIdx == 2) {
1296 PtrIncr = 1;
1297 } else {
1298 PtrIncr = 0;
1299 }
1300 break;
1301 case 4:
1302 Channel = ElemIdx;
1303 PtrIncr = 0;
1304 break;
1305 }
1306}
1307
Matt Arsenault95245662016-02-11 05:32:46 +00001308SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1309 SelectionDAG &DAG) const {
1310 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001311
Matt Arsenault95245662016-02-11 05:32:46 +00001312 unsigned Mask = 0;
1313 if (Store->getMemoryVT() == MVT::i8) {
1314 Mask = 0xff;
1315 } else if (Store->getMemoryVT() == MVT::i16) {
1316 Mask = 0xffff;
1317 }
1318
1319 SDValue Chain = Store->getChain();
1320 SDValue BasePtr = Store->getBasePtr();
1321 EVT MemVT = Store->getMemoryVT();
1322
1323 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1324 DAG.getConstant(2, DL, MVT::i32));
1325 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1326 Chain, Ptr,
1327 DAG.getTargetConstant(0, DL, MVT::i32));
1328
1329 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1330 DAG.getConstant(0x3, DL, MVT::i32));
1331
1332 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1333 DAG.getConstant(3, DL, MVT::i32));
1334
1335 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1336 Store->getValue());
1337
1338 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1339
1340 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1341 MaskedValue, ShiftAmt);
1342
1343 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1344 DAG.getConstant(Mask, DL, MVT::i32),
1345 ShiftAmt);
1346 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1347 DAG.getConstant(0xffffffff, DL, MVT::i32));
1348 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1349
1350 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1351 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1352 Chain, Value, Ptr,
1353 DAG.getTargetConstant(0, DL, MVT::i32));
1354}
1355
1356SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1357 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001358 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001359
Matt Arsenault95245662016-02-11 05:32:46 +00001360 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1361 unsigned AS = StoreNode->getAddressSpace();
1362 SDValue Value = StoreNode->getValue();
1363 EVT ValueVT = Value.getValueType();
1364
1365 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1366 ValueVT.isVector()) {
1367 return SplitVectorStore(Op, DAG);
1368 }
1369
1370 SDLoc DL(Op);
1371 SDValue Chain = StoreNode->getChain();
1372 SDValue Ptr = StoreNode->getBasePtr();
1373
1374 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001375 if (StoreNode->isTruncatingStore()) {
1376 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001377 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001378 EVT MemVT = StoreNode->getMemoryVT();
1379 SDValue MaskConstant;
1380 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001381 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001382 } else {
1383 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001384 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001385 }
1386 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001387 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001388 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001389 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001390 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1391 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001392 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001393 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1394 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1395 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1396 // vector instead.
1397 SDValue Src[4] = {
1398 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001399 DAG.getConstant(0, DL, MVT::i32),
1400 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001401 Mask
1402 };
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001403 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001404 SDValue Args[3] = { Chain, Input, DWordAddr };
1405 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001406 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001407 StoreNode->getMemOperand());
1408 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001409 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001410 // Convert pointer from byte address to dword address.
1411 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1412 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001413 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001414
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001415 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001416 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001417 } else {
1418 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1419 }
1420 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001421 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001422 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001423
Matt Arsenault95245662016-02-11 05:32:46 +00001424 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001425 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001426
Matt Arsenault95245662016-02-11 05:32:46 +00001427 EVT MemVT = StoreNode->getMemoryVT();
1428 if (MemVT.bitsLT(MVT::i32))
1429 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001430
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001431 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001432 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001433 const AMDGPUFrameLowering *TFL =
1434 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001435 unsigned StackWidth = TFL->getStackWidth(MF);
1436
1437 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1438
1439 if (ValueVT.isVector()) {
1440 unsigned NumElemVT = ValueVT.getVectorNumElements();
1441 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001442 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001443
1444 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1445 "vector width in load");
1446
1447 for (unsigned i = 0; i < NumElemVT; ++i) {
1448 unsigned Channel, PtrIncr;
1449 getStackAddress(StackWidth, i, Channel, PtrIncr);
1450 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001451 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001452 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001453 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001454
1455 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1456 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001457 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001458 }
Craig Topper48d114b2014-04-26 18:35:24 +00001459 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001460 } else {
1461 if (ValueVT == MVT::i8) {
1462 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1463 }
1464 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001465 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001466 }
1467
1468 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001469}
1470
Tom Stellard365366f2013-01-23 02:09:06 +00001471// return (512 + (kc_bank << 12)
1472static int
1473ConstantAddressBlock(unsigned AddressSpace) {
1474 switch (AddressSpace) {
1475 case AMDGPUAS::CONSTANT_BUFFER_0:
1476 return 512;
1477 case AMDGPUAS::CONSTANT_BUFFER_1:
1478 return 512 + 4096;
1479 case AMDGPUAS::CONSTANT_BUFFER_2:
1480 return 512 + 4096 * 2;
1481 case AMDGPUAS::CONSTANT_BUFFER_3:
1482 return 512 + 4096 * 3;
1483 case AMDGPUAS::CONSTANT_BUFFER_4:
1484 return 512 + 4096 * 4;
1485 case AMDGPUAS::CONSTANT_BUFFER_5:
1486 return 512 + 4096 * 5;
1487 case AMDGPUAS::CONSTANT_BUFFER_6:
1488 return 512 + 4096 * 6;
1489 case AMDGPUAS::CONSTANT_BUFFER_7:
1490 return 512 + 4096 * 7;
1491 case AMDGPUAS::CONSTANT_BUFFER_8:
1492 return 512 + 4096 * 8;
1493 case AMDGPUAS::CONSTANT_BUFFER_9:
1494 return 512 + 4096 * 9;
1495 case AMDGPUAS::CONSTANT_BUFFER_10:
1496 return 512 + 4096 * 10;
1497 case AMDGPUAS::CONSTANT_BUFFER_11:
1498 return 512 + 4096 * 11;
1499 case AMDGPUAS::CONSTANT_BUFFER_12:
1500 return 512 + 4096 * 12;
1501 case AMDGPUAS::CONSTANT_BUFFER_13:
1502 return 512 + 4096 * 13;
1503 case AMDGPUAS::CONSTANT_BUFFER_14:
1504 return 512 + 4096 * 14;
1505 case AMDGPUAS::CONSTANT_BUFFER_15:
1506 return 512 + 4096 * 15;
1507 default:
1508 return -1;
1509 }
1510}
1511
Matt Arsenault6dfda962016-02-10 18:21:39 +00001512SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1513 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001514 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001515 LoadSDNode *Load = cast<LoadSDNode>(Op);
1516 ISD::LoadExtType ExtType = Load->getExtensionType();
1517 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001518
Matt Arsenault6dfda962016-02-10 18:21:39 +00001519 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1520 // register (2-)byte extract.
1521
1522 // Get Register holding the target.
1523 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1524 DAG.getConstant(2, DL, MVT::i32));
1525 // Load the Register.
1526 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1527 Load->getChain(),
1528 Ptr,
1529 DAG.getTargetConstant(0, DL, MVT::i32),
1530 Op.getOperand(2));
1531
1532 // Get offset within the register.
1533 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1534 Load->getBasePtr(),
1535 DAG.getConstant(0x3, DL, MVT::i32));
1536
1537 // Bit offset of target byte (byteIdx * 8).
1538 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1539 DAG.getConstant(3, DL, MVT::i32));
1540
1541 // Shift to the right.
1542 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1543
1544 // Eliminate the upper bits by setting them to ...
1545 EVT MemEltVT = MemVT.getScalarType();
1546
1547 // ... ones.
1548 if (ExtType == ISD::SEXTLOAD) {
1549 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1550
1551 SDValue Ops[] = {
1552 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1553 Load->getChain()
1554 };
1555
1556 return DAG.getMergeValues(Ops, DL);
1557 }
1558
1559 // ... or zeros.
1560 SDValue Ops[] = {
1561 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1562 Load->getChain()
1563 };
1564
1565 return DAG.getMergeValues(Ops, DL);
1566}
1567
1568SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1569 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1570 unsigned AS = LoadNode->getAddressSpace();
1571 EVT MemVT = LoadNode->getMemoryVT();
1572 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1573
1574 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1575 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1576 return lowerPrivateExtLoad(Op, DAG);
1577 }
1578
1579 SDLoc DL(Op);
1580 EVT VT = Op.getValueType();
1581 SDValue Chain = LoadNode->getChain();
1582 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001583
Tom Stellard35bb18c2013-08-26 15:06:04 +00001584 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1585 SDValue MergedValues[2] = {
Matt Arsenault9c499c32016-04-14 23:31:26 +00001586 scalarizeVectorLoad(LoadNode, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001587 Chain
1588 };
Craig Topper64941d92014-04-27 19:20:57 +00001589 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001590 }
1591
Tom Stellard365366f2013-01-23 02:09:06 +00001592 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001593 if (ConstantBlock > -1 &&
1594 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1595 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001596 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001597 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1598 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001599 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001600 SDValue Slots[4];
1601 for (unsigned i = 0; i < 4; i++) {
1602 // We want Const position encoded with the following formula :
1603 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1604 // const_index is Ptr computed by llvm using an alignment of 16.
1605 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1606 // then div by 4 at the ISel step
1607 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001608 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001609 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1610 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001611 EVT NewVT = MVT::v4i32;
1612 unsigned NumElements = 4;
1613 if (VT.isVector()) {
1614 NewVT = VT;
1615 NumElements = VT.getVectorNumElements();
1616 }
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001617 Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001618 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001619 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001620 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001621 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1622 DAG.getConstant(4, DL, MVT::i32)),
1623 DAG.getConstant(LoadNode->getAddressSpace() -
1624 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001625 );
1626 }
1627
1628 if (!VT.isVector()) {
1629 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001630 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001631 }
1632
1633 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001634 Result,
1635 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001636 };
Craig Topper64941d92014-04-27 19:20:57 +00001637 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001638 }
1639
Matt Arsenault6dfda962016-02-10 18:21:39 +00001640 SDValue LoweredLoad;
1641
Matt Arsenault909d0c02013-10-30 23:43:29 +00001642 // For most operations returning SDValue() will result in the node being
1643 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1644 // need to manually expand loads that may be legal in some address spaces and
1645 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1646 // compute shaders, since the data is sign extended when it is uploaded to the
1647 // buffer. However SEXT loads from other address spaces are not supported, so
1648 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001649 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1650 EVT MemVT = LoadNode->getMemoryVT();
1651 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001652 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1653 LoadNode->getPointerInfo(), MemVT,
1654 LoadNode->isVolatile(),
1655 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001656 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001657 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001658 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1659 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001660
Jan Veselyb670d372015-05-26 18:07:22 +00001661 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001662 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001663 }
1664
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001665 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1666 return SDValue();
1667 }
1668
1669 // Lowering for indirect addressing
1670 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001671 const AMDGPUFrameLowering *TFL =
1672 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001673 unsigned StackWidth = TFL->getStackWidth(MF);
1674
1675 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1676
1677 if (VT.isVector()) {
1678 unsigned NumElemVT = VT.getVectorNumElements();
1679 EVT ElemVT = VT.getVectorElementType();
1680 SDValue Loads[4];
1681
1682 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1683 "vector width in load");
1684
1685 for (unsigned i = 0; i < NumElemVT; ++i) {
1686 unsigned Channel, PtrIncr;
1687 getStackAddress(StackWidth, i, Channel, PtrIncr);
1688 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001689 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001690 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1691 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001692 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001693 Op.getOperand(2));
1694 }
1695 for (unsigned i = NumElemVT; i < 4; ++i) {
1696 Loads[i] = DAG.getUNDEF(ElemVT);
1697 }
1698 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001699 LoweredLoad = DAG.getBuildVector(TargetVT, DL, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001700 } else {
1701 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1702 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001703 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001704 Op.getOperand(2));
1705 }
1706
Matt Arsenault7939acd2014-04-07 16:44:24 +00001707 SDValue Ops[2] = {
1708 LoweredLoad,
1709 Chain
1710 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001711
Craig Topper64941d92014-04-27 19:20:57 +00001712 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001713}
Tom Stellard75aadc22012-12-11 21:25:42 +00001714
Matt Arsenault1d555c42014-06-23 18:00:55 +00001715SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1716 SDValue Chain = Op.getOperand(0);
1717 SDValue Cond = Op.getOperand(1);
1718 SDValue Jump = Op.getOperand(2);
1719
1720 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1721 Chain, Jump, Cond);
1722}
1723
Matt Arsenault81d06012016-03-07 21:10:13 +00001724SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1725 SelectionDAG &DAG) const {
1726 MachineFunction &MF = DAG.getMachineFunction();
1727 const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
1728
1729 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1730
1731 unsigned FrameIndex = FIN->getIndex();
1732 unsigned IgnoredFrameReg;
1733 unsigned Offset =
1734 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1735 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1736 Op.getValueType());
1737}
1738
Tom Stellard75aadc22012-12-11 21:25:42 +00001739/// XXX Only kernel functions are supported, so we can assume for now that
1740/// every function is a kernel function, but in the future we should use
1741/// separate calling conventions for kernel and non-kernel functions.
1742SDValue R600TargetLowering::LowerFormalArguments(
1743 SDValue Chain,
1744 CallingConv::ID CallConv,
1745 bool isVarArg,
1746 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001747 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001748 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001749 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001750 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1751 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001752 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001753 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001754
Tom Stellardaf775432013-10-23 00:44:32 +00001755 SmallVector<ISD::InputArg, 8> LocalIns;
1756
Matt Arsenault209a7b92014-04-18 07:40:20 +00001757 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001758
1759 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001760
Tom Stellard1e803092013-07-23 01:48:18 +00001761 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001762 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001763 const ISD::InputArg &In = Ins[i];
1764 EVT VT = In.VT;
1765 EVT MemVT = VA.getLocVT();
1766 if (!VT.isVector() && MemVT.isVector()) {
1767 // Get load source type if scalarized.
1768 MemVT = MemVT.getVectorElementType();
1769 }
Tom Stellard78e01292013-07-23 01:47:58 +00001770
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001771 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001772 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1773 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1774 InVals.push_back(Register);
1775 continue;
1776 }
1777
Tom Stellard75aadc22012-12-11 21:25:42 +00001778 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001779 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001780
Matt Arsenaultfae02982014-03-17 18:58:11 +00001781 // i64 isn't a legal type, so the register type used ends up as i32, which
1782 // isn't expected here. It attempts to create this sextload, but it ends up
1783 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1784 // for <1 x i64>.
1785
Tom Stellardacfeebf2013-07-23 01:48:05 +00001786 // The first 36 bytes of the input buffer contains information about
1787 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001788 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1789 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1790 // FIXME: This should really check the extload type, but the handling of
1791 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001792
Matt Arsenault74ef2772014-08-13 18:14:11 +00001793 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1794 Ext = ISD::SEXTLOAD;
1795 }
1796
1797 // Compute the offset from the value.
1798 // XXX - I think PartOffset should give you this, but it seems to give the
1799 // size of the register which isn't useful.
1800
Andrew Trick05938a52015-02-16 18:10:47 +00001801 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001802 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001803 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001804
1805 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1806 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001807 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001808 DAG.getUNDEF(MVT::i32),
1809 PtrInfo,
1810 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001811
1812 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001813 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001814 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001815 }
1816 return Chain;
1817}
1818
Mehdi Amini44ede332015-07-09 02:09:04 +00001819EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1820 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001821 if (!VT.isVector())
1822 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001823 return VT.changeVectorElementTypeToInteger();
1824}
1825
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001826bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1827 unsigned AddrSpace,
1828 unsigned Align,
1829 bool *IsFast) const {
1830 if (IsFast)
1831 *IsFast = false;
1832
1833 if (!VT.isSimple() || VT == MVT::Other)
1834 return false;
1835
1836 if (VT.bitsLT(MVT::i32))
1837 return false;
1838
1839 // TODO: This is a rough estimate.
1840 if (IsFast)
1841 *IsFast = true;
1842
1843 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1844}
1845
Matt Arsenault209a7b92014-04-18 07:40:20 +00001846static SDValue CompactSwizzlableVector(
1847 SelectionDAG &DAG, SDValue VectorEntry,
1848 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001849 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1850 assert(RemapSwizzle.empty());
1851 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001852 VectorEntry.getOperand(0),
1853 VectorEntry.getOperand(1),
1854 VectorEntry.getOperand(2),
1855 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001856 };
1857
1858 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001859 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001860 // We mask write here to teach later passes that the ith element of this
1861 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1862 // break false dependencies and additionnaly make assembly easier to read.
1863 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001864 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1865 if (C->isZero()) {
1866 RemapSwizzle[i] = 4; // SEL_0
1867 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1868 } else if (C->isExactlyValue(1.0)) {
1869 RemapSwizzle[i] = 5; // SEL_1
1870 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1871 }
1872 }
1873
Sanjay Patel57195842016-03-14 17:28:46 +00001874 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001875 continue;
1876 for (unsigned j = 0; j < i; j++) {
1877 if (NewBldVec[i] == NewBldVec[j]) {
1878 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1879 RemapSwizzle[i] = j;
1880 break;
1881 }
1882 }
1883 }
1884
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001885 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1886 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001887}
1888
Benjamin Kramer193960c2013-06-11 13:32:25 +00001889static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1890 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001891 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1892 assert(RemapSwizzle.empty());
1893 SDValue NewBldVec[4] = {
1894 VectorEntry.getOperand(0),
1895 VectorEntry.getOperand(1),
1896 VectorEntry.getOperand(2),
1897 VectorEntry.getOperand(3)
1898 };
1899 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001900 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001901 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001902 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1903 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1904 ->getZExtValue();
1905 if (i == Idx)
1906 isUnmovable[Idx] = true;
1907 }
1908 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001909
1910 for (unsigned i = 0; i < 4; i++) {
1911 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1912 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1913 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001914 if (isUnmovable[Idx])
1915 continue;
1916 // Swap i and Idx
1917 std::swap(NewBldVec[Idx], NewBldVec[i]);
1918 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1919 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001920 }
1921 }
1922
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001923 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1924 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001925}
1926
1927
1928SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001929 SDValue Swz[4], SelectionDAG &DAG,
1930 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001931 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1932 // Old -> New swizzle values
1933 DenseMap<unsigned, unsigned> SwizzleRemap;
1934
1935 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1936 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001937 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001938 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001939 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001940 }
1941
1942 SwizzleRemap.clear();
1943 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1944 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001945 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001946 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001947 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001948 }
1949
1950 return BuildVector;
1951}
1952
1953
Tom Stellard75aadc22012-12-11 21:25:42 +00001954//===----------------------------------------------------------------------===//
1955// Custom DAG Optimizations
1956//===----------------------------------------------------------------------===//
1957
1958SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1959 DAGCombinerInfo &DCI) const {
1960 SelectionDAG &DAG = DCI.DAG;
1961
1962 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001963 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001964 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1965 case ISD::FP_ROUND: {
1966 SDValue Arg = N->getOperand(0);
1967 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001968 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001969 Arg.getOperand(0));
1970 }
1971 break;
1972 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001973
1974 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1975 // (i32 select_cc f32, f32, -1, 0 cc)
1976 //
1977 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1978 // this to one of the SET*_DX10 instructions.
1979 case ISD::FP_TO_SINT: {
1980 SDValue FNeg = N->getOperand(0);
1981 if (FNeg.getOpcode() != ISD::FNEG) {
1982 return SDValue();
1983 }
1984 SDValue SelectCC = FNeg.getOperand(0);
1985 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1986 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1987 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1988 !isHWTrueValue(SelectCC.getOperand(2)) ||
1989 !isHWFalseValue(SelectCC.getOperand(3))) {
1990 return SDValue();
1991 }
1992
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001993 SDLoc dl(N);
1994 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001995 SelectCC.getOperand(0), // LHS
1996 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001997 DAG.getConstant(-1, dl, MVT::i32), // True
1998 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001999 SelectCC.getOperand(4)); // CC
2000
2001 break;
2002 }
Quentin Colombete2e05482013-07-30 00:27:16 +00002003
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00002004 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
2005 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00002006 case ISD::INSERT_VECTOR_ELT: {
2007 SDValue InVec = N->getOperand(0);
2008 SDValue InVal = N->getOperand(1);
2009 SDValue EltNo = N->getOperand(2);
2010 SDLoc dl(N);
2011
2012 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00002013 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00002014 return InVec;
2015
2016 EVT VT = InVec.getValueType();
2017
2018 // If we can't generate a legal BUILD_VECTOR, exit
2019 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
2020 return SDValue();
2021
2022 // Check that we know which element is being inserted
2023 if (!isa<ConstantSDNode>(EltNo))
2024 return SDValue();
2025 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2026
2027 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2028 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2029 // vector elements.
2030 SmallVector<SDValue, 8> Ops;
2031 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2032 Ops.append(InVec.getNode()->op_begin(),
2033 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00002034 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00002035 unsigned NElts = VT.getVectorNumElements();
2036 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2037 } else {
2038 return SDValue();
2039 }
2040
2041 // Insert the element
2042 if (Elt < Ops.size()) {
2043 // All the operands of BUILD_VECTOR must have the same type;
2044 // we enforce that here.
2045 EVT OpVT = Ops[0].getValueType();
2046 if (InVal.getValueType() != OpVT)
2047 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2048 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2049 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2050 Ops[Elt] = InVal;
2051 }
2052
2053 // Return the new vector
Ahmed Bougacha128f8732016-04-26 21:15:30 +00002054 return DAG.getBuildVector(VT, dl, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002055 }
2056
Tom Stellard365366f2013-01-23 02:09:06 +00002057 // Extract_vec (Build_vector) generated by custom lowering
2058 // also needs to be customly combined
2059 case ISD::EXTRACT_VECTOR_ELT: {
2060 SDValue Arg = N->getOperand(0);
2061 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2062 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2063 unsigned Element = Const->getZExtValue();
2064 return Arg->getOperand(Element);
2065 }
2066 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002067 if (Arg.getOpcode() == ISD::BITCAST &&
2068 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2069 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2070 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002071 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002072 Arg->getOperand(0).getOperand(Element));
2073 }
2074 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002075 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002076 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002077
2078 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002079 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002080 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002081 return Ret;
2082
Tom Stellarde06163a2013-02-07 14:02:35 +00002083 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2084 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002085 //
2086 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2087 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002088 SDValue LHS = N->getOperand(0);
2089 if (LHS.getOpcode() != ISD::SELECT_CC) {
2090 return SDValue();
2091 }
2092
2093 SDValue RHS = N->getOperand(1);
2094 SDValue True = N->getOperand(2);
2095 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002096 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002097
2098 if (LHS.getOperand(2).getNode() != True.getNode() ||
2099 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002100 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002101 return SDValue();
2102 }
2103
Tom Stellard5e524892013-03-08 15:37:11 +00002104 switch (NCC) {
2105 default: return SDValue();
2106 case ISD::SETNE: return LHS;
2107 case ISD::SETEQ: {
2108 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2109 LHSCC = ISD::getSetCCInverse(LHSCC,
2110 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002111 if (DCI.isBeforeLegalizeOps() ||
2112 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2113 return DAG.getSelectCC(SDLoc(N),
2114 LHS.getOperand(0),
2115 LHS.getOperand(1),
2116 LHS.getOperand(2),
2117 LHS.getOperand(3),
2118 LHSCC);
2119 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002120 }
Tom Stellard5e524892013-03-08 15:37:11 +00002121 }
Tom Stellardcd428182013-09-28 02:50:38 +00002122 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002123 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002124
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002125 case AMDGPUISD::EXPORT: {
2126 SDValue Arg = N->getOperand(1);
2127 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2128 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002129
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002130 SDValue NewArgs[8] = {
2131 N->getOperand(0), // Chain
2132 SDValue(),
2133 N->getOperand(2), // ArrayBase
2134 N->getOperand(3), // Type
2135 N->getOperand(4), // SWZ_X
2136 N->getOperand(5), // SWZ_Y
2137 N->getOperand(6), // SWZ_Z
2138 N->getOperand(7) // SWZ_W
2139 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002140 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002141 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002142 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002143 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002144 case AMDGPUISD::TEXTURE_FETCH: {
2145 SDValue Arg = N->getOperand(1);
2146 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2147 break;
2148
2149 SDValue NewArgs[19] = {
2150 N->getOperand(0),
2151 N->getOperand(1),
2152 N->getOperand(2),
2153 N->getOperand(3),
2154 N->getOperand(4),
2155 N->getOperand(5),
2156 N->getOperand(6),
2157 N->getOperand(7),
2158 N->getOperand(8),
2159 N->getOperand(9),
2160 N->getOperand(10),
2161 N->getOperand(11),
2162 N->getOperand(12),
2163 N->getOperand(13),
2164 N->getOperand(14),
2165 N->getOperand(15),
2166 N->getOperand(16),
2167 N->getOperand(17),
2168 N->getOperand(18),
2169 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002170 SDLoc DL(N);
2171 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2172 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002173 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002174 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002175
2176 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002177}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002178
2179static bool
2180FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002181 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002182 const R600InstrInfo *TII =
2183 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002184 if (!Src.isMachineOpcode())
2185 return false;
2186 switch (Src.getMachineOpcode()) {
2187 case AMDGPU::FNEG_R600:
2188 if (!Neg.getNode())
2189 return false;
2190 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002191 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002192 return true;
2193 case AMDGPU::FABS_R600:
2194 if (!Abs.getNode())
2195 return false;
2196 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002197 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002198 return true;
2199 case AMDGPU::CONST_COPY: {
2200 unsigned Opcode = ParentNode->getMachineOpcode();
2201 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2202
2203 if (!Sel.getNode())
2204 return false;
2205
2206 SDValue CstOffset = Src.getOperand(0);
2207 if (ParentNode->getValueType(0).isVector())
2208 return false;
2209
2210 // Gather constants values
2211 int SrcIndices[] = {
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2223 };
2224 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002225 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002226 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2227 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2228 continue;
2229 if (HasDst) {
2230 OtherSrcIdx--;
2231 OtherSelIdx--;
2232 }
2233 if (RegisterSDNode *Reg =
2234 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2235 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002236 ConstantSDNode *Cst
2237 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002238 Consts.push_back(Cst->getZExtValue());
2239 }
2240 }
2241 }
2242
Matt Arsenault37c12d72014-05-12 20:42:57 +00002243 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002244 Consts.push_back(Cst->getZExtValue());
2245 if (!TII->fitsConstReadLimitations(Consts)) {
2246 return false;
2247 }
2248
2249 Sel = CstOffset;
2250 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2251 return true;
2252 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002253 case AMDGPU::MOV_IMM_I32:
2254 case AMDGPU::MOV_IMM_F32: {
2255 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2256 uint64_t ImmValue = 0;
2257
2258
2259 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2260 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2261 float FloatValue = FPC->getValueAPF().convertToFloat();
2262 if (FloatValue == 0.0) {
2263 ImmReg = AMDGPU::ZERO;
2264 } else if (FloatValue == 0.5) {
2265 ImmReg = AMDGPU::HALF;
2266 } else if (FloatValue == 1.0) {
2267 ImmReg = AMDGPU::ONE;
2268 } else {
2269 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2270 }
2271 } else {
2272 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2273 uint64_t Value = C->getZExtValue();
2274 if (Value == 0) {
2275 ImmReg = AMDGPU::ZERO;
2276 } else if (Value == 1) {
2277 ImmReg = AMDGPU::ONE_INT;
2278 } else {
2279 ImmValue = Value;
2280 }
2281 }
2282
2283 // Check that we aren't already using an immediate.
2284 // XXX: It's possible for an instruction to have more than one
2285 // immediate operand, but this is not supported yet.
2286 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2287 if (!Imm.getNode())
2288 return false;
2289 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2290 assert(C);
2291 if (C->getZExtValue())
2292 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002293 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002294 }
2295 Src = DAG.getRegister(ImmReg, MVT::i32);
2296 return true;
2297 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002298 default:
2299 return false;
2300 }
2301}
2302
2303
2304/// \brief Fold the instructions after selecting them
2305SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2306 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002307 const R600InstrInfo *TII =
2308 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002309 if (!Node->isMachineOpcode())
2310 return Node;
2311 unsigned Opcode = Node->getMachineOpcode();
2312 SDValue FakeOp;
2313
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002314 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002315
2316 if (Opcode == AMDGPU::DOT_4) {
2317 int OperandIdx[] = {
2318 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2319 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2320 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2321 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2322 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2323 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2324 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2325 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002326 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002327 int NegIdx[] = {
2328 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2329 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2330 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2331 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2332 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2333 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2334 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2335 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2336 };
2337 int AbsIdx[] = {
2338 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2339 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2340 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2341 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2342 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2343 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2344 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2345 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2346 };
2347 for (unsigned i = 0; i < 8; i++) {
2348 if (OperandIdx[i] < 0)
2349 return Node;
2350 SDValue &Src = Ops[OperandIdx[i] - 1];
2351 SDValue &Neg = Ops[NegIdx[i] - 1];
2352 SDValue &Abs = Ops[AbsIdx[i] - 1];
2353 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2354 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2355 if (HasDst)
2356 SelIdx--;
2357 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002358 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2359 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2360 }
2361 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2362 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2363 SDValue &Src = Ops[i];
2364 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002365 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2366 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002367 } else if (Opcode == AMDGPU::CLAMP_R600) {
2368 SDValue Src = Node->getOperand(0);
2369 if (!Src.isMachineOpcode() ||
2370 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2371 return Node;
2372 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2373 AMDGPU::OpName::clamp);
2374 if (ClampIdx < 0)
2375 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002376 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002377 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002378 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2379 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2380 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002381 } else {
2382 if (!TII->hasInstrModifiers(Opcode))
2383 return Node;
2384 int OperandIdx[] = {
2385 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2386 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2387 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2388 };
2389 int NegIdx[] = {
2390 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2391 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2392 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2393 };
2394 int AbsIdx[] = {
2395 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2396 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2397 -1
2398 };
2399 for (unsigned i = 0; i < 3; i++) {
2400 if (OperandIdx[i] < 0)
2401 return Node;
2402 SDValue &Src = Ops[OperandIdx[i] - 1];
2403 SDValue &Neg = Ops[NegIdx[i] - 1];
2404 SDValue FakeAbs;
2405 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2406 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2407 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002408 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2409 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002410 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002411 ImmIdx--;
2412 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002413 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002414 SDValue &Imm = Ops[ImmIdx];
2415 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002416 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2417 }
2418 }
2419
2420 return Node;
2421}