blob: c7386118be95bc748f774024837d8c1837a0ef7e [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Matt Arsenault4e466652014-04-16 01:41:30 +000094 // Expand sign extension of vectors
95 if (!Subtarget->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
97
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
105
106 if (!Subtarget->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
114
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
116
117
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000120 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000121 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000125 for (MVT VT : MVT::integer_valuetypes()) {
126 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
127 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
128 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000129
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000130 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
131 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
132 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000133
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000134 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
137 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000138
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000139 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000140 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000141 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000142 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000143 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000145
Tom Stellard365366f2013-01-23 02:09:06 +0000146 setOperationAction(ISD::LOAD, MVT::i32, Custom);
147 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000148 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
149
Tom Stellard880a80a2014-06-17 16:53:14 +0000150 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
151 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
152 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
153 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
154
155 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
156 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
157 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
158 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
159
Tom Stellard75aadc22012-12-11 21:25:42 +0000160 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000161 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000162 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000163 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000164 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000165
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000166 setOperationAction(ISD::SUB, MVT::i64, Expand);
167
Tom Stellard5f337882014-04-29 23:12:43 +0000168 // These should be replaced by UDVIREM, but it does not happen automatically
169 // during Type Legalization
170 setOperationAction(ISD::UDIV, MVT::i64, Custom);
171 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000172 setOperationAction(ISD::SDIV, MVT::i64, Custom);
173 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000174
Jan Vesely25f36272014-06-18 12:27:13 +0000175 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
176 // to be Legal/Custom in order to avoid library calls.
177 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000178 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000179 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000180
Michel Danzer49812b52013-07-10 16:37:07 +0000181 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
182
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000183 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
184 for (MVT VT : ScalarIntVTs) {
185 setOperationAction(ISD::ADDC, VT, Expand);
186 setOperationAction(ISD::SUBC, VT, Expand);
187 setOperationAction(ISD::ADDE, VT, Expand);
188 setOperationAction(ISD::SUBE, VT, Expand);
189 }
190
Tom Stellardfc455472013-08-12 22:33:21 +0000191 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000192}
193
194MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
195 MachineInstr * MI, MachineBasicBlock * BB) const {
196 MachineFunction * MF = BB->getParent();
197 MachineRegisterInfo &MRI = MF->getRegInfo();
198 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000199 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000200 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000201
202 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000203 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000204 // Replace LDS_*_RET instruction that don't have any uses with the
205 // equivalent LDS_*_NORET instruction.
206 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000207 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
208 assert(DstIdx != -1);
209 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000210 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
211 // LDS_1A2D support and remove this special case.
212 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
213 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000214 return BB;
215
216 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
217 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000218 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
219 NewMI.addOperand(MI->getOperand(i));
220 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 } else {
222 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
223 }
224 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225 case AMDGPU::CLAMP_R600: {
226 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
227 AMDGPU::MOV,
228 MI->getOperand(0).getReg(),
229 MI->getOperand(1).getReg());
230 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
231 break;
232 }
233
234 case AMDGPU::FABS_R600: {
235 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
236 AMDGPU::MOV,
237 MI->getOperand(0).getReg(),
238 MI->getOperand(1).getReg());
239 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
240 break;
241 }
242
243 case AMDGPU::FNEG_R600: {
244 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
245 AMDGPU::MOV,
246 MI->getOperand(0).getReg(),
247 MI->getOperand(1).getReg());
248 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
249 break;
250 }
251
Tom Stellard75aadc22012-12-11 21:25:42 +0000252 case AMDGPU::MASK_WRITE: {
253 unsigned maskedRegister = MI->getOperand(0).getReg();
254 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
255 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
256 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
257 break;
258 }
259
260 case AMDGPU::MOV_IMM_F32:
261 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
262 MI->getOperand(1).getFPImm()->getValueAPF()
263 .bitcastToAPInt().getZExtValue());
264 break;
265 case AMDGPU::MOV_IMM_I32:
266 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
267 MI->getOperand(1).getImm());
268 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000269 case AMDGPU::CONST_COPY: {
270 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
271 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000272 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000273 MI->getOperand(1).getImm());
274 break;
275 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000276
277 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000278 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000279 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000280 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000281
282 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
283 .addOperand(MI->getOperand(0))
284 .addOperand(MI->getOperand(1))
285 .addImm(EOP); // Set End of program bit
286 break;
287 }
288
Tom Stellard75aadc22012-12-11 21:25:42 +0000289 case AMDGPU::TXD: {
290 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
291 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000292 MachineOperand &RID = MI->getOperand(4);
293 MachineOperand &SID = MI->getOperand(5);
294 unsigned TextureId = MI->getOperand(6).getImm();
295 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
296 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 switch (TextureId) {
299 case 5: // Rect
300 CTX = CTY = 0;
301 break;
302 case 6: // Shadow1D
303 SrcW = SrcZ;
304 break;
305 case 7: // Shadow2D
306 SrcW = SrcZ;
307 break;
308 case 8: // ShadowRect
309 CTX = CTY = 0;
310 SrcW = SrcZ;
311 break;
312 case 9: // 1DArray
313 SrcZ = SrcY;
314 CTZ = 0;
315 break;
316 case 10: // 2DArray
317 CTZ = 0;
318 break;
319 case 11: // Shadow1DArray
320 SrcZ = SrcY;
321 CTZ = 0;
322 break;
323 case 12: // Shadow2DArray
324 CTZ = 0;
325 break;
326 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000327 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
328 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000329 .addImm(SrcX)
330 .addImm(SrcY)
331 .addImm(SrcZ)
332 .addImm(SrcW)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(0)
337 .addImm(1)
338 .addImm(2)
339 .addImm(3)
340 .addOperand(RID)
341 .addOperand(SID)
342 .addImm(CTX)
343 .addImm(CTY)
344 .addImm(CTZ)
345 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000346 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
347 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000348 .addImm(SrcX)
349 .addImm(SrcY)
350 .addImm(SrcZ)
351 .addImm(SrcW)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(1)
357 .addImm(2)
358 .addImm(3)
359 .addOperand(RID)
360 .addOperand(SID)
361 .addImm(CTX)
362 .addImm(CTY)
363 .addImm(CTZ)
364 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000365 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
366 .addOperand(MI->getOperand(0))
367 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000368 .addImm(SrcX)
369 .addImm(SrcY)
370 .addImm(SrcZ)
371 .addImm(SrcW)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(0)
376 .addImm(1)
377 .addImm(2)
378 .addImm(3)
379 .addOperand(RID)
380 .addOperand(SID)
381 .addImm(CTX)
382 .addImm(CTY)
383 .addImm(CTZ)
384 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000385 .addReg(T0, RegState::Implicit)
386 .addReg(T1, RegState::Implicit);
387 break;
388 }
389
390 case AMDGPU::TXD_SHADOW: {
391 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
392 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000393 MachineOperand &RID = MI->getOperand(4);
394 MachineOperand &SID = MI->getOperand(5);
395 unsigned TextureId = MI->getOperand(6).getImm();
396 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
397 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
398
399 switch (TextureId) {
400 case 5: // Rect
401 CTX = CTY = 0;
402 break;
403 case 6: // Shadow1D
404 SrcW = SrcZ;
405 break;
406 case 7: // Shadow2D
407 SrcW = SrcZ;
408 break;
409 case 8: // ShadowRect
410 CTX = CTY = 0;
411 SrcW = SrcZ;
412 break;
413 case 9: // 1DArray
414 SrcZ = SrcY;
415 CTZ = 0;
416 break;
417 case 10: // 2DArray
418 CTZ = 0;
419 break;
420 case 11: // Shadow1DArray
421 SrcZ = SrcY;
422 CTZ = 0;
423 break;
424 case 12: // Shadow2DArray
425 CTZ = 0;
426 break;
427 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000428
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
430 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000431 .addImm(SrcX)
432 .addImm(SrcY)
433 .addImm(SrcZ)
434 .addImm(SrcW)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(0)
439 .addImm(1)
440 .addImm(2)
441 .addImm(3)
442 .addOperand(RID)
443 .addOperand(SID)
444 .addImm(CTX)
445 .addImm(CTY)
446 .addImm(CTZ)
447 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
449 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000450 .addImm(SrcX)
451 .addImm(SrcY)
452 .addImm(SrcZ)
453 .addImm(SrcW)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(0)
458 .addImm(1)
459 .addImm(2)
460 .addImm(3)
461 .addOperand(RID)
462 .addOperand(SID)
463 .addImm(CTX)
464 .addImm(CTY)
465 .addImm(CTZ)
466 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000467 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
468 .addOperand(MI->getOperand(0))
469 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000470 .addImm(SrcX)
471 .addImm(SrcY)
472 .addImm(SrcZ)
473 .addImm(SrcW)
474 .addImm(0)
475 .addImm(0)
476 .addImm(0)
477 .addImm(0)
478 .addImm(1)
479 .addImm(2)
480 .addImm(3)
481 .addOperand(RID)
482 .addOperand(SID)
483 .addImm(CTX)
484 .addImm(CTY)
485 .addImm(CTZ)
486 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 .addReg(T0, RegState::Implicit)
488 .addReg(T1, RegState::Implicit);
489 break;
490 }
491
492 case AMDGPU::BRANCH:
493 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000494 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 break;
496
497 case AMDGPU::BRANCH_COND_f32: {
498 MachineInstr *NewMI =
499 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
500 AMDGPU::PREDICATE_BIT)
501 .addOperand(MI->getOperand(1))
502 .addImm(OPCODE_IS_NOT_ZERO)
503 .addImm(0); // Flags
504 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000505 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 .addOperand(MI->getOperand(0))
507 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
508 break;
509 }
510
511 case AMDGPU::BRANCH_COND_i32: {
512 MachineInstr *NewMI =
513 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
514 AMDGPU::PREDICATE_BIT)
515 .addOperand(MI->getOperand(1))
516 .addImm(OPCODE_IS_NOT_ZERO_INT)
517 .addImm(0); // Flags
518 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000519 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 .addOperand(MI->getOperand(0))
521 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
522 break;
523 }
524
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 case AMDGPU::EG_ExportSwz:
526 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000527 // Instruction is left unmodified if its not the last one of its type
528 bool isLastInstructionOfItsType = true;
529 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000530 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000531 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000532 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000533 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
534 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
535 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
536 .getImm();
537 if (CurrentInstExportType == InstExportType) {
538 isLastInstructionOfItsType = false;
539 break;
540 }
541 }
542 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000543 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000544 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 return BB;
546 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
547 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
548 .addOperand(MI->getOperand(0))
549 .addOperand(MI->getOperand(1))
550 .addOperand(MI->getOperand(2))
551 .addOperand(MI->getOperand(3))
552 .addOperand(MI->getOperand(4))
553 .addOperand(MI->getOperand(5))
554 .addOperand(MI->getOperand(6))
555 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000556 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 break;
558 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000559 case AMDGPU::RETURN: {
560 // RETURN instructions must have the live-out registers as implicit uses,
561 // otherwise they appear dead.
562 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
563 MachineInstrBuilder MIB(*MF, MI);
564 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
565 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
566 return BB;
567 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 }
569
570 MI->eraseFromParent();
571 return BB;
572}
573
574//===----------------------------------------------------------------------===//
575// Custom DAG Lowering Operations
576//===----------------------------------------------------------------------===//
577
Tom Stellard75aadc22012-12-11 21:25:42 +0000578SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000579 MachineFunction &MF = DAG.getMachineFunction();
580 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000581 switch (Op.getOpcode()) {
582 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000583 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
584 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000585 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000586 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000587 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000588 case ISD::FCOS:
589 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000592 case ISD::LOAD: {
593 SDValue Result = LowerLOAD(Op, DAG);
594 assert((!Result.getNode() ||
595 Result.getNode()->getNumValues() == 2) &&
596 "Load should return a value and a chain");
597 return Result;
598 }
599
Matt Arsenault1d555c42014-06-23 18:00:55 +0000600 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000601 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::INTRINSIC_VOID: {
603 SDValue Chain = Op.getOperand(0);
604 unsigned IntrinsicID =
605 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
606 switch (IntrinsicID) {
607 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000608 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
609 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000610 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000611 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000612 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000613 case AMDGPUIntrinsic::R600_store_swizzle: {
614 const SDValue Args[8] = {
615 Chain,
616 Op.getOperand(2), // Export Value
617 Op.getOperand(3), // ArrayBase
618 Op.getOperand(4), // Type
619 DAG.getConstant(0, MVT::i32), // SWZ_X
620 DAG.getConstant(1, MVT::i32), // SWZ_Y
621 DAG.getConstant(2, MVT::i32), // SWZ_Z
622 DAG.getConstant(3, MVT::i32) // SWZ_W
623 };
Craig Topper48d114b2014-04-26 18:35:24 +0000624 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000625 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000626
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 // default for switch(IntrinsicID)
628 default: break;
629 }
630 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
631 break;
632 }
633 case ISD::INTRINSIC_WO_CHAIN: {
634 unsigned IntrinsicID =
635 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
636 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000637 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000638 switch(IntrinsicID) {
639 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000640 case AMDGPUIntrinsic::R600_load_input: {
641 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
642 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
643 MachineFunction &MF = DAG.getMachineFunction();
644 MachineRegisterInfo &MRI = MF.getRegInfo();
645 MRI.addLiveIn(Reg);
646 return DAG.getCopyFromReg(DAG.getEntryNode(),
647 SDLoc(DAG.getEntryNode()), Reg, VT);
648 }
649
650 case AMDGPUIntrinsic::R600_interp_input: {
651 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
652 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
653 MachineSDNode *interp;
654 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000655 const R600InstrInfo *TII =
656 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000657 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
658 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
659 return DAG.getTargetExtractSubreg(
660 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
661 DL, MVT::f32, SDValue(interp, 0));
662 }
663 MachineFunction &MF = DAG.getMachineFunction();
664 MachineRegisterInfo &MRI = MF.getRegInfo();
665 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
666 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
667 MRI.addLiveIn(RegisterI);
668 MRI.addLiveIn(RegisterJ);
669 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
670 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
671 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
672 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
673
674 if (slot % 4 < 2)
675 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
676 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
677 RegisterJNode, RegisterINode);
678 else
679 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
680 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
681 RegisterJNode, RegisterINode);
682 return SDValue(interp, slot % 2);
683 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000684 case AMDGPUIntrinsic::R600_interp_xy:
685 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000686 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000687 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000688 SDValue RegisterINode = Op.getOperand(2);
689 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000690
Vincent Lejeunef143af32013-11-11 22:10:24 +0000691 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000692 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000693 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000694 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000695 else
696 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000697 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000698 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000699 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
700 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 case AMDGPUIntrinsic::R600_tex:
703 case AMDGPUIntrinsic::R600_texc:
704 case AMDGPUIntrinsic::R600_txl:
705 case AMDGPUIntrinsic::R600_txlc:
706 case AMDGPUIntrinsic::R600_txb:
707 case AMDGPUIntrinsic::R600_txbc:
708 case AMDGPUIntrinsic::R600_txf:
709 case AMDGPUIntrinsic::R600_txq:
710 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000711 case AMDGPUIntrinsic::R600_ddy:
712 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000713 unsigned TextureOp;
714 switch (IntrinsicID) {
715 case AMDGPUIntrinsic::R600_tex:
716 TextureOp = 0;
717 break;
718 case AMDGPUIntrinsic::R600_texc:
719 TextureOp = 1;
720 break;
721 case AMDGPUIntrinsic::R600_txl:
722 TextureOp = 2;
723 break;
724 case AMDGPUIntrinsic::R600_txlc:
725 TextureOp = 3;
726 break;
727 case AMDGPUIntrinsic::R600_txb:
728 TextureOp = 4;
729 break;
730 case AMDGPUIntrinsic::R600_txbc:
731 TextureOp = 5;
732 break;
733 case AMDGPUIntrinsic::R600_txf:
734 TextureOp = 6;
735 break;
736 case AMDGPUIntrinsic::R600_txq:
737 TextureOp = 7;
738 break;
739 case AMDGPUIntrinsic::R600_ddx:
740 TextureOp = 8;
741 break;
742 case AMDGPUIntrinsic::R600_ddy:
743 TextureOp = 9;
744 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000745 case AMDGPUIntrinsic::R600_ldptr:
746 TextureOp = 10;
747 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000748 default:
749 llvm_unreachable("Unknow Texture Operation");
750 }
751
752 SDValue TexArgs[19] = {
753 DAG.getConstant(TextureOp, MVT::i32),
754 Op.getOperand(1),
755 DAG.getConstant(0, MVT::i32),
756 DAG.getConstant(1, MVT::i32),
757 DAG.getConstant(2, MVT::i32),
758 DAG.getConstant(3, MVT::i32),
759 Op.getOperand(2),
760 Op.getOperand(3),
761 Op.getOperand(4),
762 DAG.getConstant(0, MVT::i32),
763 DAG.getConstant(1, MVT::i32),
764 DAG.getConstant(2, MVT::i32),
765 DAG.getConstant(3, MVT::i32),
766 Op.getOperand(5),
767 Op.getOperand(6),
768 Op.getOperand(7),
769 Op.getOperand(8),
770 Op.getOperand(9),
771 Op.getOperand(10)
772 };
Craig Topper48d114b2014-04-26 18:35:24 +0000773 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000774 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000775 case AMDGPUIntrinsic::AMDGPU_dp4: {
776 SDValue Args[8] = {
777 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
778 DAG.getConstant(0, MVT::i32)),
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
780 DAG.getConstant(0, MVT::i32)),
781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
782 DAG.getConstant(1, MVT::i32)),
783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
784 DAG.getConstant(1, MVT::i32)),
785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
786 DAG.getConstant(2, MVT::i32)),
787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
788 DAG.getConstant(2, MVT::i32)),
789 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
790 DAG.getConstant(3, MVT::i32)),
791 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
792 DAG.getConstant(3, MVT::i32))
793 };
Craig Topper48d114b2014-04-26 18:35:24 +0000794 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000795 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000796
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000813 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 return LowerImplicitParameter(DAG, VT, DL, 8);
815
Jan Veselye5121f32014-10-14 20:05:26 +0000816 case Intrinsic::AMDGPU_read_workdim:
817 return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
818
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000819 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000820 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
821 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000822 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000823 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
824 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000825 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000826 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
827 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000828 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000829 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
830 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000831 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000832 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
833 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000834 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000835 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
836 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000837 case Intrinsic::AMDGPU_rsq:
838 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
839 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000840 }
841 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
842 break;
843 }
844 } // end switch(Op.getOpcode())
845 return SDValue();
846}
847
848void R600TargetLowering::ReplaceNodeResults(SDNode *N,
849 SmallVectorImpl<SDValue> &Results,
850 SelectionDAG &DAG) const {
851 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000852 default:
853 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
854 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000855 case ISD::FP_TO_UINT:
856 if (N->getValueType(0) == MVT::i1) {
857 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
858 return;
859 }
860 // Fall-through. Since we don't care about out of bounds values
861 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
862 // considers some extra cases which are not necessary here.
863 case ISD::FP_TO_SINT: {
864 SDValue Result;
865 if (expandFP_TO_SINT(N, Result, DAG))
866 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000867 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000868 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000869 case ISD::UDIV: {
870 SDValue Op = SDValue(N, 0);
871 SDLoc DL(Op);
872 EVT VT = Op.getValueType();
873 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
874 N->getOperand(0), N->getOperand(1));
875 Results.push_back(UDIVREM);
876 break;
877 }
878 case ISD::UREM: {
879 SDValue Op = SDValue(N, 0);
880 SDLoc DL(Op);
881 EVT VT = Op.getValueType();
882 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
883 N->getOperand(0), N->getOperand(1));
884 Results.push_back(UDIVREM.getValue(1));
885 break;
886 }
887 case ISD::SDIV: {
888 SDValue Op = SDValue(N, 0);
889 SDLoc DL(Op);
890 EVT VT = Op.getValueType();
891 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
892 N->getOperand(0), N->getOperand(1));
893 Results.push_back(SDIVREM);
894 break;
895 }
896 case ISD::SREM: {
897 SDValue Op = SDValue(N, 0);
898 SDLoc DL(Op);
899 EVT VT = Op.getValueType();
900 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
901 N->getOperand(0), N->getOperand(1));
902 Results.push_back(SDIVREM.getValue(1));
903 break;
904 }
905 case ISD::SDIVREM: {
906 SDValue Op = SDValue(N, 1);
907 SDValue RES = LowerSDIVREM(Op, DAG);
908 Results.push_back(RES);
909 Results.push_back(RES.getValue(1));
910 break;
911 }
912 case ISD::UDIVREM: {
913 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000914 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000915 break;
916 }
917 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000918}
919
Tom Stellard880a80a2014-06-17 16:53:14 +0000920SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
921 SDValue Vector) const {
922
923 SDLoc DL(Vector);
924 EVT VecVT = Vector.getValueType();
925 EVT EltVT = VecVT.getVectorElementType();
926 SmallVector<SDValue, 8> Args;
927
928 for (unsigned i = 0, e = VecVT.getVectorNumElements();
929 i != e; ++i) {
930 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
931 Vector, DAG.getConstant(i, getVectorIdxTy())));
932 }
933
934 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
935}
936
937SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
938 SelectionDAG &DAG) const {
939
940 SDLoc DL(Op);
941 SDValue Vector = Op.getOperand(0);
942 SDValue Index = Op.getOperand(1);
943
944 if (isa<ConstantSDNode>(Index) ||
945 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
946 return Op;
947
948 Vector = vectorToVerticalVector(DAG, Vector);
949 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
950 Vector, Index);
951}
952
953SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
954 SelectionDAG &DAG) const {
955 SDLoc DL(Op);
956 SDValue Vector = Op.getOperand(0);
957 SDValue Value = Op.getOperand(1);
958 SDValue Index = Op.getOperand(2);
959
960 if (isa<ConstantSDNode>(Index) ||
961 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
962 return Op;
963
964 Vector = vectorToVerticalVector(DAG, Vector);
965 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
966 Vector, Value, Index);
967 return vectorToVerticalVector(DAG, Insert);
968}
969
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000970SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
971 // On hw >= R700, COS/SIN input must be between -1. and 1.
972 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
973 EVT VT = Op.getValueType();
974 SDValue Arg = Op.getOperand(0);
975 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
976 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
977 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
978 DAG.getConstantFP(0.15915494309, MVT::f32)),
979 DAG.getConstantFP(0.5, MVT::f32)));
980 unsigned TrigNode;
981 switch (Op.getOpcode()) {
982 case ISD::FCOS:
983 TrigNode = AMDGPUISD::COS_HW;
984 break;
985 case ISD::FSIN:
986 TrigNode = AMDGPUISD::SIN_HW;
987 break;
988 default:
989 llvm_unreachable("Wrong trig opcode");
990 }
991 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
992 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
993 DAG.getConstantFP(-0.5, MVT::f32)));
994 if (Gen >= AMDGPUSubtarget::R700)
995 return TrigVal;
996 // On R600 hw, COS/SIN input must be between -Pi and Pi.
997 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
998 DAG.getConstantFP(3.14159265359, MVT::f32));
999}
1000
Jan Vesely25f36272014-06-18 12:27:13 +00001001SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1002 SDLoc DL(Op);
1003 EVT VT = Op.getValueType();
1004
1005 SDValue Lo = Op.getOperand(0);
1006 SDValue Hi = Op.getOperand(1);
1007 SDValue Shift = Op.getOperand(2);
1008 SDValue Zero = DAG.getConstant(0, VT);
1009 SDValue One = DAG.getConstant(1, VT);
1010
1011 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1012 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1013 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1014 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1015
1016 // The dance around Width1 is necessary for 0 special case.
1017 // Without it the CompShift might be 32, producing incorrect results in
1018 // Overflow. So we do the shift in two steps, the alternative is to
1019 // add a conditional to filter the special case.
1020
1021 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1022 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1023
1024 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1025 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1026 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1027
1028 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1029 SDValue LoBig = Zero;
1030
1031 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1032 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1033
1034 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1035}
1036
Jan Vesely900ff2e2014-06-18 12:27:15 +00001037SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1038 SDLoc DL(Op);
1039 EVT VT = Op.getValueType();
1040
1041 SDValue Lo = Op.getOperand(0);
1042 SDValue Hi = Op.getOperand(1);
1043 SDValue Shift = Op.getOperand(2);
1044 SDValue Zero = DAG.getConstant(0, VT);
1045 SDValue One = DAG.getConstant(1, VT);
1046
Jan Veselyecf51332014-06-18 12:27:17 +00001047 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1048
Jan Vesely900ff2e2014-06-18 12:27:15 +00001049 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1050 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1051 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1052 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1053
1054 // The dance around Width1 is necessary for 0 special case.
1055 // Without it the CompShift might be 32, producing incorrect results in
1056 // Overflow. So we do the shift in two steps, the alternative is to
1057 // add a conditional to filter the special case.
1058
1059 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1060 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1061
Jan Veselyecf51332014-06-18 12:27:17 +00001062 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001063 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1064 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1065
Jan Veselyecf51332014-06-18 12:27:17 +00001066 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1067 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001068
1069 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1070 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1071
1072 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1073}
1074
Tom Stellard75aadc22012-12-11 21:25:42 +00001075SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1076 return DAG.getNode(
1077 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001078 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001079 MVT::i1,
1080 Op, DAG.getConstantFP(0.0f, MVT::f32),
1081 DAG.getCondCode(ISD::SETNE)
1082 );
1083}
1084
Tom Stellard75aadc22012-12-11 21:25:42 +00001085SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001086 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001087 unsigned DwordOffset) const {
1088 unsigned ByteOffset = DwordOffset * 4;
1089 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001090 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001091
1092 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1093 assert(isInt<16>(ByteOffset));
1094
1095 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1096 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1097 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1098 false, false, false, 0);
1099}
1100
Tom Stellard75aadc22012-12-11 21:25:42 +00001101bool R600TargetLowering::isZero(SDValue Op) const {
1102 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1103 return Cst->isNullValue();
1104 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1105 return CstFP->isZero();
1106 } else {
1107 return false;
1108 }
1109}
1110
1111SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001112 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001113 EVT VT = Op.getValueType();
1114
1115 SDValue LHS = Op.getOperand(0);
1116 SDValue RHS = Op.getOperand(1);
1117 SDValue True = Op.getOperand(2);
1118 SDValue False = Op.getOperand(3);
1119 SDValue CC = Op.getOperand(4);
1120 SDValue Temp;
1121
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001122 if (VT == MVT::f32) {
1123 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1124 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1125 if (MinMax)
1126 return MinMax;
1127 }
1128
Tom Stellard75aadc22012-12-11 21:25:42 +00001129 // LHS and RHS are guaranteed to be the same value type
1130 EVT CompareVT = LHS.getValueType();
1131
1132 // Check if we can lower this to a native operation.
1133
Tom Stellard2add82d2013-03-08 15:37:09 +00001134 // Try to lower to a SET* instruction:
1135 //
1136 // SET* can match the following patterns:
1137 //
Tom Stellardcd428182013-09-28 02:50:38 +00001138 // select_cc f32, f32, -1, 0, cc_supported
1139 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1140 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001141 //
1142
1143 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001144 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1145 ISD::CondCode InverseCC =
1146 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001147 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1148 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1149 std::swap(False, True);
1150 CC = DAG.getCondCode(InverseCC);
1151 } else {
1152 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1153 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1154 std::swap(False, True);
1155 std::swap(LHS, RHS);
1156 CC = DAG.getCondCode(SwapInvCC);
1157 }
1158 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001159 }
1160
1161 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1162 (CompareVT == VT || VT == MVT::i32)) {
1163 // This can be matched by a SET* instruction.
1164 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1165 }
1166
Tom Stellard75aadc22012-12-11 21:25:42 +00001167 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001168 //
1169 // CND* can match the following patterns:
1170 //
Tom Stellardcd428182013-09-28 02:50:38 +00001171 // select_cc f32, 0.0, f32, f32, cc_supported
1172 // select_cc f32, 0.0, i32, i32, cc_supported
1173 // select_cc i32, 0, f32, f32, cc_supported
1174 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001175 //
Tom Stellardcd428182013-09-28 02:50:38 +00001176
1177 // Try to move the zero value to the RHS
1178 if (isZero(LHS)) {
1179 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1180 // Try swapping the operands
1181 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1182 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1183 std::swap(LHS, RHS);
1184 CC = DAG.getCondCode(CCSwapped);
1185 } else {
1186 // Try inverting the conditon and then swapping the operands
1187 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1188 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1189 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1190 std::swap(True, False);
1191 std::swap(LHS, RHS);
1192 CC = DAG.getCondCode(CCSwapped);
1193 }
1194 }
1195 }
1196 if (isZero(RHS)) {
1197 SDValue Cond = LHS;
1198 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001199 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1200 if (CompareVT != VT) {
1201 // Bitcast True / False to the correct types. This will end up being
1202 // a nop, but it allows us to define only a single pattern in the
1203 // .TD files for each CND* instruction rather than having to have
1204 // one pattern for integer True/False and one for fp True/False
1205 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1206 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1207 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001208
1209 switch (CCOpcode) {
1210 case ISD::SETONE:
1211 case ISD::SETUNE:
1212 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001213 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1214 Temp = True;
1215 True = False;
1216 False = Temp;
1217 break;
1218 default:
1219 break;
1220 }
1221 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1222 Cond, Zero,
1223 True, False,
1224 DAG.getCondCode(CCOpcode));
1225 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1226 }
1227
Tom Stellard75aadc22012-12-11 21:25:42 +00001228 // If we make it this for it means we have no native instructions to handle
1229 // this SELECT_CC, so we must lower it.
1230 SDValue HWTrue, HWFalse;
1231
1232 if (CompareVT == MVT::f32) {
1233 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1234 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1235 } else if (CompareVT == MVT::i32) {
1236 HWTrue = DAG.getConstant(-1, CompareVT);
1237 HWFalse = DAG.getConstant(0, CompareVT);
1238 }
1239 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001240 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001241 }
1242
1243 // Lower this unsupported SELECT_CC into a combination of two supported
1244 // SELECT_CC operations.
1245 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1246
1247 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1248 Cond, HWFalse,
1249 True, False,
1250 DAG.getCondCode(ISD::SETNE));
1251}
1252
Alp Tokercb402912014-01-24 17:20:08 +00001253/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001254/// convert these pointers to a register index. Each register holds
1255/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1256/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1257/// for indirect addressing.
1258SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1259 unsigned StackWidth,
1260 SelectionDAG &DAG) const {
1261 unsigned SRLPad;
1262 switch(StackWidth) {
1263 case 1:
1264 SRLPad = 2;
1265 break;
1266 case 2:
1267 SRLPad = 3;
1268 break;
1269 case 4:
1270 SRLPad = 4;
1271 break;
1272 default: llvm_unreachable("Invalid stack width");
1273 }
1274
Andrew Trickef9de2a2013-05-25 02:42:55 +00001275 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001276 DAG.getConstant(SRLPad, MVT::i32));
1277}
1278
1279void R600TargetLowering::getStackAddress(unsigned StackWidth,
1280 unsigned ElemIdx,
1281 unsigned &Channel,
1282 unsigned &PtrIncr) const {
1283 switch (StackWidth) {
1284 default:
1285 case 1:
1286 Channel = 0;
1287 if (ElemIdx > 0) {
1288 PtrIncr = 1;
1289 } else {
1290 PtrIncr = 0;
1291 }
1292 break;
1293 case 2:
1294 Channel = ElemIdx % 2;
1295 if (ElemIdx == 2) {
1296 PtrIncr = 1;
1297 } else {
1298 PtrIncr = 0;
1299 }
1300 break;
1301 case 4:
1302 Channel = ElemIdx;
1303 PtrIncr = 0;
1304 break;
1305 }
1306}
1307
Tom Stellard75aadc22012-12-11 21:25:42 +00001308SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001309 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001310 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1311 SDValue Chain = Op.getOperand(0);
1312 SDValue Value = Op.getOperand(1);
1313 SDValue Ptr = Op.getOperand(2);
1314
Tom Stellard2ffc3302013-08-26 15:05:44 +00001315 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001316 if (Result.getNode()) {
1317 return Result;
1318 }
1319
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001320 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1321 if (StoreNode->isTruncatingStore()) {
1322 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001323 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001324 EVT MemVT = StoreNode->getMemoryVT();
1325 SDValue MaskConstant;
1326 if (MemVT == MVT::i8) {
1327 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1328 } else {
1329 assert(MemVT == MVT::i16);
1330 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1331 }
1332 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1333 DAG.getConstant(2, MVT::i32));
1334 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1335 DAG.getConstant(0x00000003, VT));
1336 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1337 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1338 DAG.getConstant(3, VT));
1339 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1340 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1341 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1342 // vector instead.
1343 SDValue Src[4] = {
1344 ShiftedValue,
1345 DAG.getConstant(0, MVT::i32),
1346 DAG.getConstant(0, MVT::i32),
1347 Mask
1348 };
Craig Topper48d114b2014-04-26 18:35:24 +00001349 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001350 SDValue Args[3] = { Chain, Input, DWordAddr };
1351 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001352 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001353 StoreNode->getMemOperand());
1354 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1355 Value.getValueType().bitsGE(MVT::i32)) {
1356 // Convert pointer from byte address to dword address.
1357 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1358 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1359 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001360
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001361 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001362 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001363 } else {
1364 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1365 }
1366 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001367 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001368 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001369
1370 EVT ValueVT = Value.getValueType();
1371
1372 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1373 return SDValue();
1374 }
1375
Tom Stellarde9373602014-01-22 19:24:14 +00001376 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1377 if (Ret.getNode()) {
1378 return Ret;
1379 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001380 // Lowering for indirect addressing
1381
1382 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001383 const AMDGPUFrameLowering *TFL =
1384 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001385 unsigned StackWidth = TFL->getStackWidth(MF);
1386
1387 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1388
1389 if (ValueVT.isVector()) {
1390 unsigned NumElemVT = ValueVT.getVectorNumElements();
1391 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001392 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001393
1394 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1395 "vector width in load");
1396
1397 for (unsigned i = 0; i < NumElemVT; ++i) {
1398 unsigned Channel, PtrIncr;
1399 getStackAddress(StackWidth, i, Channel, PtrIncr);
1400 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1401 DAG.getConstant(PtrIncr, MVT::i32));
1402 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1403 Value, DAG.getConstant(i, MVT::i32));
1404
1405 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1406 Chain, Elem, Ptr,
1407 DAG.getTargetConstant(Channel, MVT::i32));
1408 }
Craig Topper48d114b2014-04-26 18:35:24 +00001409 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001410 } else {
1411 if (ValueVT == MVT::i8) {
1412 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1413 }
1414 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001415 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001416 }
1417
1418 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001419}
1420
Tom Stellard365366f2013-01-23 02:09:06 +00001421// return (512 + (kc_bank << 12)
1422static int
1423ConstantAddressBlock(unsigned AddressSpace) {
1424 switch (AddressSpace) {
1425 case AMDGPUAS::CONSTANT_BUFFER_0:
1426 return 512;
1427 case AMDGPUAS::CONSTANT_BUFFER_1:
1428 return 512 + 4096;
1429 case AMDGPUAS::CONSTANT_BUFFER_2:
1430 return 512 + 4096 * 2;
1431 case AMDGPUAS::CONSTANT_BUFFER_3:
1432 return 512 + 4096 * 3;
1433 case AMDGPUAS::CONSTANT_BUFFER_4:
1434 return 512 + 4096 * 4;
1435 case AMDGPUAS::CONSTANT_BUFFER_5:
1436 return 512 + 4096 * 5;
1437 case AMDGPUAS::CONSTANT_BUFFER_6:
1438 return 512 + 4096 * 6;
1439 case AMDGPUAS::CONSTANT_BUFFER_7:
1440 return 512 + 4096 * 7;
1441 case AMDGPUAS::CONSTANT_BUFFER_8:
1442 return 512 + 4096 * 8;
1443 case AMDGPUAS::CONSTANT_BUFFER_9:
1444 return 512 + 4096 * 9;
1445 case AMDGPUAS::CONSTANT_BUFFER_10:
1446 return 512 + 4096 * 10;
1447 case AMDGPUAS::CONSTANT_BUFFER_11:
1448 return 512 + 4096 * 11;
1449 case AMDGPUAS::CONSTANT_BUFFER_12:
1450 return 512 + 4096 * 12;
1451 case AMDGPUAS::CONSTANT_BUFFER_13:
1452 return 512 + 4096 * 13;
1453 case AMDGPUAS::CONSTANT_BUFFER_14:
1454 return 512 + 4096 * 14;
1455 case AMDGPUAS::CONSTANT_BUFFER_15:
1456 return 512 + 4096 * 15;
1457 default:
1458 return -1;
1459 }
1460}
1461
1462SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1463{
1464 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001465 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001466 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1467 SDValue Chain = Op.getOperand(0);
1468 SDValue Ptr = Op.getOperand(1);
1469 SDValue LoweredLoad;
1470
Tom Stellarde9373602014-01-22 19:24:14 +00001471 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1472 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001473 SDValue Ops[2] = {
1474 Ret,
1475 Chain
1476 };
Craig Topper64941d92014-04-27 19:20:57 +00001477 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001478 }
1479
Tom Stellard067c8152014-07-21 14:01:14 +00001480 // Lower loads constant address space global variable loads
1481 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
1482 isa<GlobalVariable>(
1483 GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
1484
1485 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1486 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1487 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1488 DAG.getConstant(2, MVT::i32));
1489 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1490 LoadNode->getChain(), Ptr,
1491 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
1492 }
Tom Stellarde9373602014-01-22 19:24:14 +00001493
Tom Stellard35bb18c2013-08-26 15:06:04 +00001494 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1495 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001496 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001497 Chain
1498 };
Craig Topper64941d92014-04-27 19:20:57 +00001499 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001500 }
1501
Tom Stellard365366f2013-01-23 02:09:06 +00001502 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001503 if (ConstantBlock > -1 &&
1504 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1505 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001506 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001507 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1508 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001509 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001510 SDValue Slots[4];
1511 for (unsigned i = 0; i < 4; i++) {
1512 // We want Const position encoded with the following formula :
1513 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1514 // const_index is Ptr computed by llvm using an alignment of 16.
1515 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1516 // then div by 4 at the ISel step
1517 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1518 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1519 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1520 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001521 EVT NewVT = MVT::v4i32;
1522 unsigned NumElements = 4;
1523 if (VT.isVector()) {
1524 NewVT = VT;
1525 NumElements = VT.getVectorNumElements();
1526 }
Craig Topper48d114b2014-04-26 18:35:24 +00001527 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001528 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001529 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001530 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001531 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001532 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001533 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001534 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001535 );
1536 }
1537
1538 if (!VT.isVector()) {
1539 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1540 DAG.getConstant(0, MVT::i32));
1541 }
1542
1543 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001544 Result,
1545 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001546 };
Craig Topper64941d92014-04-27 19:20:57 +00001547 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001548 }
1549
Matt Arsenault909d0c02013-10-30 23:43:29 +00001550 // For most operations returning SDValue() will result in the node being
1551 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1552 // need to manually expand loads that may be legal in some address spaces and
1553 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1554 // compute shaders, since the data is sign extended when it is uploaded to the
1555 // buffer. However SEXT loads from other address spaces are not supported, so
1556 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001557 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1558 EVT MemVT = LoadNode->getMemoryVT();
1559 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1560 SDValue ShiftAmount =
1561 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1562 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1563 LoadNode->getPointerInfo(), MemVT,
1564 LoadNode->isVolatile(),
1565 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001566 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001567 LoadNode->getAlignment());
1568 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1569 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1570
1571 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001572 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001573 }
1574
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001575 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1576 return SDValue();
1577 }
1578
1579 // Lowering for indirect addressing
1580 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001581 const AMDGPUFrameLowering *TFL =
1582 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001583 unsigned StackWidth = TFL->getStackWidth(MF);
1584
1585 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1586
1587 if (VT.isVector()) {
1588 unsigned NumElemVT = VT.getVectorNumElements();
1589 EVT ElemVT = VT.getVectorElementType();
1590 SDValue Loads[4];
1591
1592 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1593 "vector width in load");
1594
1595 for (unsigned i = 0; i < NumElemVT; ++i) {
1596 unsigned Channel, PtrIncr;
1597 getStackAddress(StackWidth, i, Channel, PtrIncr);
1598 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1599 DAG.getConstant(PtrIncr, MVT::i32));
1600 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1601 Chain, Ptr,
1602 DAG.getTargetConstant(Channel, MVT::i32),
1603 Op.getOperand(2));
1604 }
1605 for (unsigned i = NumElemVT; i < 4; ++i) {
1606 Loads[i] = DAG.getUNDEF(ElemVT);
1607 }
1608 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001609 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001610 } else {
1611 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1612 Chain, Ptr,
1613 DAG.getTargetConstant(0, MVT::i32), // Channel
1614 Op.getOperand(2));
1615 }
1616
Matt Arsenault7939acd2014-04-07 16:44:24 +00001617 SDValue Ops[2] = {
1618 LoweredLoad,
1619 Chain
1620 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001621
Craig Topper64941d92014-04-27 19:20:57 +00001622 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001623}
Tom Stellard75aadc22012-12-11 21:25:42 +00001624
Matt Arsenault1d555c42014-06-23 18:00:55 +00001625SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1626 SDValue Chain = Op.getOperand(0);
1627 SDValue Cond = Op.getOperand(1);
1628 SDValue Jump = Op.getOperand(2);
1629
1630 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1631 Chain, Jump, Cond);
1632}
1633
Tom Stellard75aadc22012-12-11 21:25:42 +00001634/// XXX Only kernel functions are supported, so we can assume for now that
1635/// every function is a kernel function, but in the future we should use
1636/// separate calling conventions for kernel and non-kernel functions.
1637SDValue R600TargetLowering::LowerFormalArguments(
1638 SDValue Chain,
1639 CallingConv::ID CallConv,
1640 bool isVarArg,
1641 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001642 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001643 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001644 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001645 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1646 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001647 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001648 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001649
Tom Stellardaf775432013-10-23 00:44:32 +00001650 SmallVector<ISD::InputArg, 8> LocalIns;
1651
Matt Arsenault209a7b92014-04-18 07:40:20 +00001652 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001653
1654 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001655
Tom Stellard1e803092013-07-23 01:48:18 +00001656 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001657 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001658 const ISD::InputArg &In = Ins[i];
1659 EVT VT = In.VT;
1660 EVT MemVT = VA.getLocVT();
1661 if (!VT.isVector() && MemVT.isVector()) {
1662 // Get load source type if scalarized.
1663 MemVT = MemVT.getVectorElementType();
1664 }
Tom Stellard78e01292013-07-23 01:47:58 +00001665
Jan Veselye5121f32014-10-14 20:05:26 +00001666 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001667 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1668 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1669 InVals.push_back(Register);
1670 continue;
1671 }
1672
Tom Stellard75aadc22012-12-11 21:25:42 +00001673 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001674 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001675
Matt Arsenaultfae02982014-03-17 18:58:11 +00001676 // i64 isn't a legal type, so the register type used ends up as i32, which
1677 // isn't expected here. It attempts to create this sextload, but it ends up
1678 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1679 // for <1 x i64>.
1680
Tom Stellardacfeebf2013-07-23 01:48:05 +00001681 // The first 36 bytes of the input buffer contains information about
1682 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001683 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1684 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1685 // FIXME: This should really check the extload type, but the handling of
1686 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001687
Matt Arsenault74ef2772014-08-13 18:14:11 +00001688 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1689 Ext = ISD::SEXTLOAD;
1690 }
1691
1692 // Compute the offset from the value.
1693 // XXX - I think PartOffset should give you this, but it seems to give the
1694 // size of the register which isn't useful.
1695
Andrew Trick05938a52015-02-16 18:10:47 +00001696 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001697 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001698 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001699
1700 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1701 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Jan Veselye5121f32014-10-14 20:05:26 +00001702 DAG.getConstant(Offset, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001703 DAG.getUNDEF(MVT::i32),
1704 PtrInfo,
1705 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001706
1707 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001708 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001709 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001710 }
1711 return Chain;
1712}
1713
Matt Arsenault758659232013-05-18 00:21:46 +00001714EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001715 if (!VT.isVector())
1716 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001717 return VT.changeVectorElementTypeToInteger();
1718}
1719
Matt Arsenault209a7b92014-04-18 07:40:20 +00001720static SDValue CompactSwizzlableVector(
1721 SelectionDAG &DAG, SDValue VectorEntry,
1722 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001723 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1724 assert(RemapSwizzle.empty());
1725 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001726 VectorEntry.getOperand(0),
1727 VectorEntry.getOperand(1),
1728 VectorEntry.getOperand(2),
1729 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001730 };
1731
1732 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001733 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1734 // We mask write here to teach later passes that the ith element of this
1735 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1736 // break false dependencies and additionnaly make assembly easier to read.
1737 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001738 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1739 if (C->isZero()) {
1740 RemapSwizzle[i] = 4; // SEL_0
1741 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1742 } else if (C->isExactlyValue(1.0)) {
1743 RemapSwizzle[i] = 5; // SEL_1
1744 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1745 }
1746 }
1747
1748 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1749 continue;
1750 for (unsigned j = 0; j < i; j++) {
1751 if (NewBldVec[i] == NewBldVec[j]) {
1752 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1753 RemapSwizzle[i] = j;
1754 break;
1755 }
1756 }
1757 }
1758
1759 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001760 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001761}
1762
Benjamin Kramer193960c2013-06-11 13:32:25 +00001763static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1764 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001765 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1766 assert(RemapSwizzle.empty());
1767 SDValue NewBldVec[4] = {
1768 VectorEntry.getOperand(0),
1769 VectorEntry.getOperand(1),
1770 VectorEntry.getOperand(2),
1771 VectorEntry.getOperand(3)
1772 };
1773 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001774 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001775 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001776 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1777 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1778 ->getZExtValue();
1779 if (i == Idx)
1780 isUnmovable[Idx] = true;
1781 }
1782 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001783
1784 for (unsigned i = 0; i < 4; i++) {
1785 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1786 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1787 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001788 if (isUnmovable[Idx])
1789 continue;
1790 // Swap i and Idx
1791 std::swap(NewBldVec[Idx], NewBldVec[i]);
1792 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1793 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001794 }
1795 }
1796
1797 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001798 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001799}
1800
1801
1802SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1803SDValue Swz[4], SelectionDAG &DAG) const {
1804 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1805 // Old -> New swizzle values
1806 DenseMap<unsigned, unsigned> SwizzleRemap;
1807
1808 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1809 for (unsigned i = 0; i < 4; i++) {
1810 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1811 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1812 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1813 }
1814
1815 SwizzleRemap.clear();
1816 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1817 for (unsigned i = 0; i < 4; i++) {
1818 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1819 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1820 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1821 }
1822
1823 return BuildVector;
1824}
1825
1826
Tom Stellard75aadc22012-12-11 21:25:42 +00001827//===----------------------------------------------------------------------===//
1828// Custom DAG Optimizations
1829//===----------------------------------------------------------------------===//
1830
1831SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1832 DAGCombinerInfo &DCI) const {
1833 SelectionDAG &DAG = DCI.DAG;
1834
1835 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001836 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001837 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1838 case ISD::FP_ROUND: {
1839 SDValue Arg = N->getOperand(0);
1840 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001841 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001842 Arg.getOperand(0));
1843 }
1844 break;
1845 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001846
1847 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1848 // (i32 select_cc f32, f32, -1, 0 cc)
1849 //
1850 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1851 // this to one of the SET*_DX10 instructions.
1852 case ISD::FP_TO_SINT: {
1853 SDValue FNeg = N->getOperand(0);
1854 if (FNeg.getOpcode() != ISD::FNEG) {
1855 return SDValue();
1856 }
1857 SDValue SelectCC = FNeg.getOperand(0);
1858 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1859 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1860 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1861 !isHWTrueValue(SelectCC.getOperand(2)) ||
1862 !isHWFalseValue(SelectCC.getOperand(3))) {
1863 return SDValue();
1864 }
1865
Andrew Trickef9de2a2013-05-25 02:42:55 +00001866 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001867 SelectCC.getOperand(0), // LHS
1868 SelectCC.getOperand(1), // RHS
1869 DAG.getConstant(-1, MVT::i32), // True
1870 DAG.getConstant(0, MVT::i32), // Flase
1871 SelectCC.getOperand(4)); // CC
1872
1873 break;
1874 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001875
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001876 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1877 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001878 case ISD::INSERT_VECTOR_ELT: {
1879 SDValue InVec = N->getOperand(0);
1880 SDValue InVal = N->getOperand(1);
1881 SDValue EltNo = N->getOperand(2);
1882 SDLoc dl(N);
1883
1884 // If the inserted element is an UNDEF, just use the input vector.
1885 if (InVal.getOpcode() == ISD::UNDEF)
1886 return InVec;
1887
1888 EVT VT = InVec.getValueType();
1889
1890 // If we can't generate a legal BUILD_VECTOR, exit
1891 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1892 return SDValue();
1893
1894 // Check that we know which element is being inserted
1895 if (!isa<ConstantSDNode>(EltNo))
1896 return SDValue();
1897 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1898
1899 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1900 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1901 // vector elements.
1902 SmallVector<SDValue, 8> Ops;
1903 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1904 Ops.append(InVec.getNode()->op_begin(),
1905 InVec.getNode()->op_end());
1906 } else if (InVec.getOpcode() == ISD::UNDEF) {
1907 unsigned NElts = VT.getVectorNumElements();
1908 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1909 } else {
1910 return SDValue();
1911 }
1912
1913 // Insert the element
1914 if (Elt < Ops.size()) {
1915 // All the operands of BUILD_VECTOR must have the same type;
1916 // we enforce that here.
1917 EVT OpVT = Ops[0].getValueType();
1918 if (InVal.getValueType() != OpVT)
1919 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1920 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1921 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1922 Ops[Elt] = InVal;
1923 }
1924
1925 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001926 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001927 }
1928
Tom Stellard365366f2013-01-23 02:09:06 +00001929 // Extract_vec (Build_vector) generated by custom lowering
1930 // also needs to be customly combined
1931 case ISD::EXTRACT_VECTOR_ELT: {
1932 SDValue Arg = N->getOperand(0);
1933 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1934 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1935 unsigned Element = Const->getZExtValue();
1936 return Arg->getOperand(Element);
1937 }
1938 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001939 if (Arg.getOpcode() == ISD::BITCAST &&
1940 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1941 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1942 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001943 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001944 Arg->getOperand(0).getOperand(Element));
1945 }
1946 }
Tom Stellard365366f2013-01-23 02:09:06 +00001947 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001948
1949 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001950 // Try common optimizations
1951 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1952 if (Ret.getNode())
1953 return Ret;
1954
Tom Stellarde06163a2013-02-07 14:02:35 +00001955 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1956 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001957 //
1958 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1959 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001960 SDValue LHS = N->getOperand(0);
1961 if (LHS.getOpcode() != ISD::SELECT_CC) {
1962 return SDValue();
1963 }
1964
1965 SDValue RHS = N->getOperand(1);
1966 SDValue True = N->getOperand(2);
1967 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001968 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001969
1970 if (LHS.getOperand(2).getNode() != True.getNode() ||
1971 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001972 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001973 return SDValue();
1974 }
1975
Tom Stellard5e524892013-03-08 15:37:11 +00001976 switch (NCC) {
1977 default: return SDValue();
1978 case ISD::SETNE: return LHS;
1979 case ISD::SETEQ: {
1980 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1981 LHSCC = ISD::getSetCCInverse(LHSCC,
1982 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001983 if (DCI.isBeforeLegalizeOps() ||
1984 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1985 return DAG.getSelectCC(SDLoc(N),
1986 LHS.getOperand(0),
1987 LHS.getOperand(1),
1988 LHS.getOperand(2),
1989 LHS.getOperand(3),
1990 LHSCC);
1991 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001992 }
Tom Stellard5e524892013-03-08 15:37:11 +00001993 }
Tom Stellardcd428182013-09-28 02:50:38 +00001994 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001995 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001996
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001997 case AMDGPUISD::EXPORT: {
1998 SDValue Arg = N->getOperand(1);
1999 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2000 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002001
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002002 SDValue NewArgs[8] = {
2003 N->getOperand(0), // Chain
2004 SDValue(),
2005 N->getOperand(2), // ArrayBase
2006 N->getOperand(3), // Type
2007 N->getOperand(4), // SWZ_X
2008 N->getOperand(5), // SWZ_Y
2009 N->getOperand(6), // SWZ_Z
2010 N->getOperand(7) // SWZ_W
2011 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002012 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002013 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002014 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002015 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002016 case AMDGPUISD::TEXTURE_FETCH: {
2017 SDValue Arg = N->getOperand(1);
2018 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2019 break;
2020
2021 SDValue NewArgs[19] = {
2022 N->getOperand(0),
2023 N->getOperand(1),
2024 N->getOperand(2),
2025 N->getOperand(3),
2026 N->getOperand(4),
2027 N->getOperand(5),
2028 N->getOperand(6),
2029 N->getOperand(7),
2030 N->getOperand(8),
2031 N->getOperand(9),
2032 N->getOperand(10),
2033 N->getOperand(11),
2034 N->getOperand(12),
2035 N->getOperand(13),
2036 N->getOperand(14),
2037 N->getOperand(15),
2038 N->getOperand(16),
2039 N->getOperand(17),
2040 N->getOperand(18),
2041 };
2042 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2043 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002044 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002045 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002046 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002047
2048 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002049}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002050
2051static bool
2052FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002053 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002054 const R600InstrInfo *TII =
2055 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002056 if (!Src.isMachineOpcode())
2057 return false;
2058 switch (Src.getMachineOpcode()) {
2059 case AMDGPU::FNEG_R600:
2060 if (!Neg.getNode())
2061 return false;
2062 Src = Src.getOperand(0);
2063 Neg = DAG.getTargetConstant(1, MVT::i32);
2064 return true;
2065 case AMDGPU::FABS_R600:
2066 if (!Abs.getNode())
2067 return false;
2068 Src = Src.getOperand(0);
2069 Abs = DAG.getTargetConstant(1, MVT::i32);
2070 return true;
2071 case AMDGPU::CONST_COPY: {
2072 unsigned Opcode = ParentNode->getMachineOpcode();
2073 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2074
2075 if (!Sel.getNode())
2076 return false;
2077
2078 SDValue CstOffset = Src.getOperand(0);
2079 if (ParentNode->getValueType(0).isVector())
2080 return false;
2081
2082 // Gather constants values
2083 int SrcIndices[] = {
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2085 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2086 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2087 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2088 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2089 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2090 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2091 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2092 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2093 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2094 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2095 };
2096 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002097 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002098 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2099 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2100 continue;
2101 if (HasDst) {
2102 OtherSrcIdx--;
2103 OtherSelIdx--;
2104 }
2105 if (RegisterSDNode *Reg =
2106 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2107 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002108 ConstantSDNode *Cst
2109 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002110 Consts.push_back(Cst->getZExtValue());
2111 }
2112 }
2113 }
2114
Matt Arsenault37c12d72014-05-12 20:42:57 +00002115 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002116 Consts.push_back(Cst->getZExtValue());
2117 if (!TII->fitsConstReadLimitations(Consts)) {
2118 return false;
2119 }
2120
2121 Sel = CstOffset;
2122 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2123 return true;
2124 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002125 case AMDGPU::MOV_IMM_I32:
2126 case AMDGPU::MOV_IMM_F32: {
2127 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2128 uint64_t ImmValue = 0;
2129
2130
2131 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2132 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2133 float FloatValue = FPC->getValueAPF().convertToFloat();
2134 if (FloatValue == 0.0) {
2135 ImmReg = AMDGPU::ZERO;
2136 } else if (FloatValue == 0.5) {
2137 ImmReg = AMDGPU::HALF;
2138 } else if (FloatValue == 1.0) {
2139 ImmReg = AMDGPU::ONE;
2140 } else {
2141 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2142 }
2143 } else {
2144 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2145 uint64_t Value = C->getZExtValue();
2146 if (Value == 0) {
2147 ImmReg = AMDGPU::ZERO;
2148 } else if (Value == 1) {
2149 ImmReg = AMDGPU::ONE_INT;
2150 } else {
2151 ImmValue = Value;
2152 }
2153 }
2154
2155 // Check that we aren't already using an immediate.
2156 // XXX: It's possible for an instruction to have more than one
2157 // immediate operand, but this is not supported yet.
2158 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2159 if (!Imm.getNode())
2160 return false;
2161 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2162 assert(C);
2163 if (C->getZExtValue())
2164 return false;
2165 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2166 }
2167 Src = DAG.getRegister(ImmReg, MVT::i32);
2168 return true;
2169 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002170 default:
2171 return false;
2172 }
2173}
2174
2175
2176/// \brief Fold the instructions after selecting them
2177SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2178 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002179 const R600InstrInfo *TII =
2180 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002181 if (!Node->isMachineOpcode())
2182 return Node;
2183 unsigned Opcode = Node->getMachineOpcode();
2184 SDValue FakeOp;
2185
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002186 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002187
2188 if (Opcode == AMDGPU::DOT_4) {
2189 int OperandIdx[] = {
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002198 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002199 int NegIdx[] = {
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2208 };
2209 int AbsIdx[] = {
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2218 };
2219 for (unsigned i = 0; i < 8; i++) {
2220 if (OperandIdx[i] < 0)
2221 return Node;
2222 SDValue &Src = Ops[OperandIdx[i] - 1];
2223 SDValue &Neg = Ops[NegIdx[i] - 1];
2224 SDValue &Abs = Ops[AbsIdx[i] - 1];
2225 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2226 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2227 if (HasDst)
2228 SelIdx--;
2229 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002230 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2231 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2232 }
2233 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2234 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2235 SDValue &Src = Ops[i];
2236 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002237 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2238 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002239 } else if (Opcode == AMDGPU::CLAMP_R600) {
2240 SDValue Src = Node->getOperand(0);
2241 if (!Src.isMachineOpcode() ||
2242 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2243 return Node;
2244 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2245 AMDGPU::OpName::clamp);
2246 if (ClampIdx < 0)
2247 return Node;
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002248 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Vincent Lejeune0167a312013-09-12 23:45:00 +00002249 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2250 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2251 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002252 } else {
2253 if (!TII->hasInstrModifiers(Opcode))
2254 return Node;
2255 int OperandIdx[] = {
2256 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2257 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2258 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2259 };
2260 int NegIdx[] = {
2261 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2262 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2263 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2264 };
2265 int AbsIdx[] = {
2266 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2267 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2268 -1
2269 };
2270 for (unsigned i = 0; i < 3; i++) {
2271 if (OperandIdx[i] < 0)
2272 return Node;
2273 SDValue &Src = Ops[OperandIdx[i] - 1];
2274 SDValue &Neg = Ops[NegIdx[i] - 1];
2275 SDValue FakeAbs;
2276 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2277 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2278 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002279 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2280 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002281 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002282 ImmIdx--;
2283 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002284 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002285 SDValue &Imm = Ops[ImmIdx];
2286 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002287 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2288 }
2289 }
2290
2291 return Node;
2292}