blob: b16d53fd015a0e37ecdfef194ca26e706b46bd14 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
33R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000034 AMDGPUTargetLowering(TM),
35 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Tom Stellard75aadc22012-12-11 21:25:42 +000043 computeRegisterProperties();
44
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Matt Arsenault4e466652014-04-16 01:41:30 +000094 // Expand sign extension of vectors
95 if (!Subtarget->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
97
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
105
106 if (!Subtarget->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
114
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
116
117
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000120 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000121 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000125 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
127 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
128 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000129 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
130 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
131
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000132 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000134 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000135 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000136 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
137 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000138
Tom Stellard365366f2013-01-23 02:09:06 +0000139 setOperationAction(ISD::LOAD, MVT::i32, Custom);
140 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000141 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
142
Tom Stellard880a80a2014-06-17 16:53:14 +0000143 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
144 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
145 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
146 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
147
148 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
149 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
150 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
151 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
152
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000154 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000155 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000156 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000157 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000158
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000159 setOperationAction(ISD::SUB, MVT::i64, Expand);
160
Tom Stellard5f337882014-04-29 23:12:43 +0000161 // These should be replaced by UDVIREM, but it does not happen automatically
162 // during Type Legalization
163 setOperationAction(ISD::UDIV, MVT::i64, Custom);
164 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000165 setOperationAction(ISD::SDIV, MVT::i64, Custom);
166 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000167
Jan Vesely25f36272014-06-18 12:27:13 +0000168 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
169 // to be Legal/Custom in order to avoid library calls.
170 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000171 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000172 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000173
Michel Danzer49812b52013-07-10 16:37:07 +0000174 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
175
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000176 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
177 for (MVT VT : ScalarIntVTs) {
178 setOperationAction(ISD::ADDC, VT, Expand);
179 setOperationAction(ISD::SUBC, VT, Expand);
180 setOperationAction(ISD::ADDE, VT, Expand);
181 setOperationAction(ISD::SUBE, VT, Expand);
182 }
183
Tom Stellardb852af52013-03-08 15:37:03 +0000184 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000185 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000186 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000187}
188
189MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
190 MachineInstr * MI, MachineBasicBlock * BB) const {
191 MachineFunction * MF = BB->getParent();
192 MachineRegisterInfo &MRI = MF->getRegInfo();
193 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000194 const R600InstrInfo *TII =
195 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000196
197 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000198 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000199 // Replace LDS_*_RET instruction that don't have any uses with the
200 // equivalent LDS_*_NORET instruction.
201 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000202 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
203 assert(DstIdx != -1);
204 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000205 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
206 return BB;
207
208 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
209 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000210 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
211 NewMI.addOperand(MI->getOperand(i));
212 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000213 } else {
214 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
215 }
216 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::CLAMP_R600: {
218 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
219 AMDGPU::MOV,
220 MI->getOperand(0).getReg(),
221 MI->getOperand(1).getReg());
222 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
223 break;
224 }
225
226 case AMDGPU::FABS_R600: {
227 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
228 AMDGPU::MOV,
229 MI->getOperand(0).getReg(),
230 MI->getOperand(1).getReg());
231 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
232 break;
233 }
234
235 case AMDGPU::FNEG_R600: {
236 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
237 AMDGPU::MOV,
238 MI->getOperand(0).getReg(),
239 MI->getOperand(1).getReg());
240 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
241 break;
242 }
243
Tom Stellard75aadc22012-12-11 21:25:42 +0000244 case AMDGPU::MASK_WRITE: {
245 unsigned maskedRegister = MI->getOperand(0).getReg();
246 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
247 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
248 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
249 break;
250 }
251
252 case AMDGPU::MOV_IMM_F32:
253 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
254 MI->getOperand(1).getFPImm()->getValueAPF()
255 .bitcastToAPInt().getZExtValue());
256 break;
257 case AMDGPU::MOV_IMM_I32:
258 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
259 MI->getOperand(1).getImm());
260 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000261 case AMDGPU::CONST_COPY: {
262 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
263 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000264 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000265 MI->getOperand(1).getImm());
266 break;
267 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000268
269 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000270 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000271 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000272 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000273
274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
275 .addOperand(MI->getOperand(0))
276 .addOperand(MI->getOperand(1))
277 .addImm(EOP); // Set End of program bit
278 break;
279 }
280
Tom Stellard75aadc22012-12-11 21:25:42 +0000281 case AMDGPU::TXD: {
282 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
283 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000284 MachineOperand &RID = MI->getOperand(4);
285 MachineOperand &SID = MI->getOperand(5);
286 unsigned TextureId = MI->getOperand(6).getImm();
287 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
288 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000289
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000290 switch (TextureId) {
291 case 5: // Rect
292 CTX = CTY = 0;
293 break;
294 case 6: // Shadow1D
295 SrcW = SrcZ;
296 break;
297 case 7: // Shadow2D
298 SrcW = SrcZ;
299 break;
300 case 8: // ShadowRect
301 CTX = CTY = 0;
302 SrcW = SrcZ;
303 break;
304 case 9: // 1DArray
305 SrcZ = SrcY;
306 CTZ = 0;
307 break;
308 case 10: // 2DArray
309 CTZ = 0;
310 break;
311 case 11: // Shadow1DArray
312 SrcZ = SrcY;
313 CTZ = 0;
314 break;
315 case 12: // Shadow2DArray
316 CTZ = 0;
317 break;
318 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000319 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
320 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 .addImm(SrcX)
322 .addImm(SrcY)
323 .addImm(SrcZ)
324 .addImm(SrcW)
325 .addImm(0)
326 .addImm(0)
327 .addImm(0)
328 .addImm(0)
329 .addImm(1)
330 .addImm(2)
331 .addImm(3)
332 .addOperand(RID)
333 .addOperand(SID)
334 .addImm(CTX)
335 .addImm(CTY)
336 .addImm(CTZ)
337 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000338 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
339 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000340 .addImm(SrcX)
341 .addImm(SrcY)
342 .addImm(SrcZ)
343 .addImm(SrcW)
344 .addImm(0)
345 .addImm(0)
346 .addImm(0)
347 .addImm(0)
348 .addImm(1)
349 .addImm(2)
350 .addImm(3)
351 .addOperand(RID)
352 .addOperand(SID)
353 .addImm(CTX)
354 .addImm(CTY)
355 .addImm(CTZ)
356 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
358 .addOperand(MI->getOperand(0))
359 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000360 .addImm(SrcX)
361 .addImm(SrcY)
362 .addImm(SrcZ)
363 .addImm(SrcW)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(1)
369 .addImm(2)
370 .addImm(3)
371 .addOperand(RID)
372 .addOperand(SID)
373 .addImm(CTX)
374 .addImm(CTY)
375 .addImm(CTZ)
376 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000377 .addReg(T0, RegState::Implicit)
378 .addReg(T1, RegState::Implicit);
379 break;
380 }
381
382 case AMDGPU::TXD_SHADOW: {
383 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
384 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000385 MachineOperand &RID = MI->getOperand(4);
386 MachineOperand &SID = MI->getOperand(5);
387 unsigned TextureId = MI->getOperand(6).getImm();
388 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
389 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
390
391 switch (TextureId) {
392 case 5: // Rect
393 CTX = CTY = 0;
394 break;
395 case 6: // Shadow1D
396 SrcW = SrcZ;
397 break;
398 case 7: // Shadow2D
399 SrcW = SrcZ;
400 break;
401 case 8: // ShadowRect
402 CTX = CTY = 0;
403 SrcW = SrcZ;
404 break;
405 case 9: // 1DArray
406 SrcZ = SrcY;
407 CTZ = 0;
408 break;
409 case 10: // 2DArray
410 CTZ = 0;
411 break;
412 case 11: // Shadow1DArray
413 SrcZ = SrcY;
414 CTZ = 0;
415 break;
416 case 12: // Shadow2DArray
417 CTZ = 0;
418 break;
419 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000420
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
422 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000423 .addImm(SrcX)
424 .addImm(SrcY)
425 .addImm(SrcZ)
426 .addImm(SrcW)
427 .addImm(0)
428 .addImm(0)
429 .addImm(0)
430 .addImm(0)
431 .addImm(1)
432 .addImm(2)
433 .addImm(3)
434 .addOperand(RID)
435 .addOperand(SID)
436 .addImm(CTX)
437 .addImm(CTY)
438 .addImm(CTZ)
439 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000440 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
441 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000442 .addImm(SrcX)
443 .addImm(SrcY)
444 .addImm(SrcZ)
445 .addImm(SrcW)
446 .addImm(0)
447 .addImm(0)
448 .addImm(0)
449 .addImm(0)
450 .addImm(1)
451 .addImm(2)
452 .addImm(3)
453 .addOperand(RID)
454 .addOperand(SID)
455 .addImm(CTX)
456 .addImm(CTY)
457 .addImm(CTZ)
458 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000459 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
460 .addOperand(MI->getOperand(0))
461 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000462 .addImm(SrcX)
463 .addImm(SrcY)
464 .addImm(SrcZ)
465 .addImm(SrcW)
466 .addImm(0)
467 .addImm(0)
468 .addImm(0)
469 .addImm(0)
470 .addImm(1)
471 .addImm(2)
472 .addImm(3)
473 .addOperand(RID)
474 .addOperand(SID)
475 .addImm(CTX)
476 .addImm(CTY)
477 .addImm(CTZ)
478 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000479 .addReg(T0, RegState::Implicit)
480 .addReg(T1, RegState::Implicit);
481 break;
482 }
483
484 case AMDGPU::BRANCH:
485 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000486 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488
489 case AMDGPU::BRANCH_COND_f32: {
490 MachineInstr *NewMI =
491 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
492 AMDGPU::PREDICATE_BIT)
493 .addOperand(MI->getOperand(1))
494 .addImm(OPCODE_IS_NOT_ZERO)
495 .addImm(0); // Flags
496 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000497 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 .addOperand(MI->getOperand(0))
499 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
500 break;
501 }
502
503 case AMDGPU::BRANCH_COND_i32: {
504 MachineInstr *NewMI =
505 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
506 AMDGPU::PREDICATE_BIT)
507 .addOperand(MI->getOperand(1))
508 .addImm(OPCODE_IS_NOT_ZERO_INT)
509 .addImm(0); // Flags
510 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000511 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000512 .addOperand(MI->getOperand(0))
513 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
514 break;
515 }
516
Tom Stellard75aadc22012-12-11 21:25:42 +0000517 case AMDGPU::EG_ExportSwz:
518 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000519 // Instruction is left unmodified if its not the last one of its type
520 bool isLastInstructionOfItsType = true;
521 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000522 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000523 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000524 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000525 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
526 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
527 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
528 .getImm();
529 if (CurrentInstExportType == InstExportType) {
530 isLastInstructionOfItsType = false;
531 break;
532 }
533 }
534 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000535 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 return BB;
538 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
539 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
540 .addOperand(MI->getOperand(0))
541 .addOperand(MI->getOperand(1))
542 .addOperand(MI->getOperand(2))
543 .addOperand(MI->getOperand(3))
544 .addOperand(MI->getOperand(4))
545 .addOperand(MI->getOperand(5))
546 .addOperand(MI->getOperand(6))
547 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000548 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000549 break;
550 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000551 case AMDGPU::RETURN: {
552 // RETURN instructions must have the live-out registers as implicit uses,
553 // otherwise they appear dead.
554 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
555 MachineInstrBuilder MIB(*MF, MI);
556 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
557 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
558 return BB;
559 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000560 }
561
562 MI->eraseFromParent();
563 return BB;
564}
565
566//===----------------------------------------------------------------------===//
567// Custom DAG Lowering Operations
568//===----------------------------------------------------------------------===//
569
Tom Stellard75aadc22012-12-11 21:25:42 +0000570SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000571 MachineFunction &MF = DAG.getMachineFunction();
572 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000573 switch (Op.getOpcode()) {
574 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000575 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
576 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000577 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000578 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000579 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000580 case ISD::FCOS:
581 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000582 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000583 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000584 case ISD::LOAD: {
585 SDValue Result = LowerLOAD(Op, DAG);
586 assert((!Result.getNode() ||
587 Result.getNode()->getNumValues() == 2) &&
588 "Load should return a value and a chain");
589 return Result;
590 }
591
Matt Arsenault1d555c42014-06-23 18:00:55 +0000592 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000593 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000594 case ISD::INTRINSIC_VOID: {
595 SDValue Chain = Op.getOperand(0);
596 unsigned IntrinsicID =
597 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
598 switch (IntrinsicID) {
599 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000600 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
601 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000602 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000603 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000604 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000605 case AMDGPUIntrinsic::R600_store_swizzle: {
606 const SDValue Args[8] = {
607 Chain,
608 Op.getOperand(2), // Export Value
609 Op.getOperand(3), // ArrayBase
610 Op.getOperand(4), // Type
611 DAG.getConstant(0, MVT::i32), // SWZ_X
612 DAG.getConstant(1, MVT::i32), // SWZ_Y
613 DAG.getConstant(2, MVT::i32), // SWZ_Z
614 DAG.getConstant(3, MVT::i32) // SWZ_W
615 };
Craig Topper48d114b2014-04-26 18:35:24 +0000616 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000617 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000618
Tom Stellard75aadc22012-12-11 21:25:42 +0000619 // default for switch(IntrinsicID)
620 default: break;
621 }
622 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
623 break;
624 }
625 case ISD::INTRINSIC_WO_CHAIN: {
626 unsigned IntrinsicID =
627 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
628 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000629 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000630 switch(IntrinsicID) {
631 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000632 case AMDGPUIntrinsic::R600_load_input: {
633 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
634 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
635 MachineFunction &MF = DAG.getMachineFunction();
636 MachineRegisterInfo &MRI = MF.getRegInfo();
637 MRI.addLiveIn(Reg);
638 return DAG.getCopyFromReg(DAG.getEntryNode(),
639 SDLoc(DAG.getEntryNode()), Reg, VT);
640 }
641
642 case AMDGPUIntrinsic::R600_interp_input: {
643 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
644 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
645 MachineSDNode *interp;
646 if (ijb < 0) {
647 const MachineFunction &MF = DAG.getMachineFunction();
648 const R600InstrInfo *TII =
649 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
650 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
651 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
652 return DAG.getTargetExtractSubreg(
653 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
654 DL, MVT::f32, SDValue(interp, 0));
655 }
656 MachineFunction &MF = DAG.getMachineFunction();
657 MachineRegisterInfo &MRI = MF.getRegInfo();
658 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
659 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
660 MRI.addLiveIn(RegisterI);
661 MRI.addLiveIn(RegisterJ);
662 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
663 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
664 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
665 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
666
667 if (slot % 4 < 2)
668 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
669 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
670 RegisterJNode, RegisterINode);
671 else
672 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
673 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
674 RegisterJNode, RegisterINode);
675 return SDValue(interp, slot % 2);
676 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000677 case AMDGPUIntrinsic::R600_interp_xy:
678 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000680 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000681 SDValue RegisterINode = Op.getOperand(2);
682 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000683
Vincent Lejeunef143af32013-11-11 22:10:24 +0000684 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000685 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000686 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000687 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000688 else
689 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000690 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000691 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000692 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
693 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000694 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000695 case AMDGPUIntrinsic::R600_tex:
696 case AMDGPUIntrinsic::R600_texc:
697 case AMDGPUIntrinsic::R600_txl:
698 case AMDGPUIntrinsic::R600_txlc:
699 case AMDGPUIntrinsic::R600_txb:
700 case AMDGPUIntrinsic::R600_txbc:
701 case AMDGPUIntrinsic::R600_txf:
702 case AMDGPUIntrinsic::R600_txq:
703 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000704 case AMDGPUIntrinsic::R600_ddy:
705 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000706 unsigned TextureOp;
707 switch (IntrinsicID) {
708 case AMDGPUIntrinsic::R600_tex:
709 TextureOp = 0;
710 break;
711 case AMDGPUIntrinsic::R600_texc:
712 TextureOp = 1;
713 break;
714 case AMDGPUIntrinsic::R600_txl:
715 TextureOp = 2;
716 break;
717 case AMDGPUIntrinsic::R600_txlc:
718 TextureOp = 3;
719 break;
720 case AMDGPUIntrinsic::R600_txb:
721 TextureOp = 4;
722 break;
723 case AMDGPUIntrinsic::R600_txbc:
724 TextureOp = 5;
725 break;
726 case AMDGPUIntrinsic::R600_txf:
727 TextureOp = 6;
728 break;
729 case AMDGPUIntrinsic::R600_txq:
730 TextureOp = 7;
731 break;
732 case AMDGPUIntrinsic::R600_ddx:
733 TextureOp = 8;
734 break;
735 case AMDGPUIntrinsic::R600_ddy:
736 TextureOp = 9;
737 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000738 case AMDGPUIntrinsic::R600_ldptr:
739 TextureOp = 10;
740 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000741 default:
742 llvm_unreachable("Unknow Texture Operation");
743 }
744
745 SDValue TexArgs[19] = {
746 DAG.getConstant(TextureOp, MVT::i32),
747 Op.getOperand(1),
748 DAG.getConstant(0, MVT::i32),
749 DAG.getConstant(1, MVT::i32),
750 DAG.getConstant(2, MVT::i32),
751 DAG.getConstant(3, MVT::i32),
752 Op.getOperand(2),
753 Op.getOperand(3),
754 Op.getOperand(4),
755 DAG.getConstant(0, MVT::i32),
756 DAG.getConstant(1, MVT::i32),
757 DAG.getConstant(2, MVT::i32),
758 DAG.getConstant(3, MVT::i32),
759 Op.getOperand(5),
760 Op.getOperand(6),
761 Op.getOperand(7),
762 Op.getOperand(8),
763 Op.getOperand(9),
764 Op.getOperand(10)
765 };
Craig Topper48d114b2014-04-26 18:35:24 +0000766 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000767 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000768 case AMDGPUIntrinsic::AMDGPU_dp4: {
769 SDValue Args[8] = {
770 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
771 DAG.getConstant(0, MVT::i32)),
772 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
773 DAG.getConstant(0, MVT::i32)),
774 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
775 DAG.getConstant(1, MVT::i32)),
776 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
777 DAG.getConstant(1, MVT::i32)),
778 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
779 DAG.getConstant(2, MVT::i32)),
780 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
781 DAG.getConstant(2, MVT::i32)),
782 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
783 DAG.getConstant(3, MVT::i32)),
784 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
785 DAG.getConstant(3, MVT::i32))
786 };
Craig Topper48d114b2014-04-26 18:35:24 +0000787 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000788 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000789
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000792 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000793 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000794 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000795 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000800 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000806 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000807 return LowerImplicitParameter(DAG, VT, DL, 8);
808
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
811 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000812 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000813 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
814 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
817 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000818 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
820 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000821 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
823 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000824 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000825 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
826 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000827 case Intrinsic::AMDGPU_rsq:
828 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
829 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000830 }
831 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
832 break;
833 }
834 } // end switch(Op.getOpcode())
835 return SDValue();
836}
837
838void R600TargetLowering::ReplaceNodeResults(SDNode *N,
839 SmallVectorImpl<SDValue> &Results,
840 SelectionDAG &DAG) const {
841 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000842 default:
843 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
844 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000845 case ISD::FP_TO_UINT:
846 if (N->getValueType(0) == MVT::i1) {
847 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
848 return;
849 }
850 // Fall-through. Since we don't care about out of bounds values
851 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
852 // considers some extra cases which are not necessary here.
853 case ISD::FP_TO_SINT: {
854 SDValue Result;
855 if (expandFP_TO_SINT(N, Result, DAG))
856 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000857 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000858 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000859 case ISD::UDIV: {
860 SDValue Op = SDValue(N, 0);
861 SDLoc DL(Op);
862 EVT VT = Op.getValueType();
863 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
864 N->getOperand(0), N->getOperand(1));
865 Results.push_back(UDIVREM);
866 break;
867 }
868 case ISD::UREM: {
869 SDValue Op = SDValue(N, 0);
870 SDLoc DL(Op);
871 EVT VT = Op.getValueType();
872 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
873 N->getOperand(0), N->getOperand(1));
874 Results.push_back(UDIVREM.getValue(1));
875 break;
876 }
877 case ISD::SDIV: {
878 SDValue Op = SDValue(N, 0);
879 SDLoc DL(Op);
880 EVT VT = Op.getValueType();
881 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
882 N->getOperand(0), N->getOperand(1));
883 Results.push_back(SDIVREM);
884 break;
885 }
886 case ISD::SREM: {
887 SDValue Op = SDValue(N, 0);
888 SDLoc DL(Op);
889 EVT VT = Op.getValueType();
890 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
891 N->getOperand(0), N->getOperand(1));
892 Results.push_back(SDIVREM.getValue(1));
893 break;
894 }
895 case ISD::SDIVREM: {
896 SDValue Op = SDValue(N, 1);
897 SDValue RES = LowerSDIVREM(Op, DAG);
898 Results.push_back(RES);
899 Results.push_back(RES.getValue(1));
900 break;
901 }
902 case ISD::UDIVREM: {
903 SDValue Op = SDValue(N, 0);
904 SDLoc DL(Op);
905 EVT VT = Op.getValueType();
906 EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
907
908 SDValue one = DAG.getConstant(1, HalfVT);
909 SDValue zero = DAG.getConstant(0, HalfVT);
910
911 //HiLo split
912 SDValue LHS = N->getOperand(0);
913 SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
914 SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
915
916 SDValue RHS = N->getOperand(1);
917 SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
918 SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
919
920 // Get Speculative values
921 SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
922 SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
923
924 SDValue REM_Hi = zero;
925 SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
926
927 SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
928 SDValue DIV_Lo = zero;
929
930 const unsigned halfBitWidth = HalfVT.getSizeInBits();
931
932 for (unsigned i = 0; i < halfBitWidth; ++i) {
933 SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
934 // Get Value of high bit
935 SDValue HBit;
936 if (halfBitWidth == 32 && Subtarget->hasBFE()) {
937 HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
938 } else {
939 HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
940 HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
941 }
942
943 SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
944 DAG.getConstant(halfBitWidth - 1, HalfVT));
945 REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
946 REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
947
948 REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
949 REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
950
951
952 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
953
954 SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
955 SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
956
957 DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
958
959 // Update REM
960
961 SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
962
963 REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
964 REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
965 REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
966 }
967
968 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
969 SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
970 Results.push_back(DIV);
971 Results.push_back(REM);
972 break;
973 }
974 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000975}
976
Tom Stellard880a80a2014-06-17 16:53:14 +0000977SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
978 SDValue Vector) const {
979
980 SDLoc DL(Vector);
981 EVT VecVT = Vector.getValueType();
982 EVT EltVT = VecVT.getVectorElementType();
983 SmallVector<SDValue, 8> Args;
984
985 for (unsigned i = 0, e = VecVT.getVectorNumElements();
986 i != e; ++i) {
987 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
988 Vector, DAG.getConstant(i, getVectorIdxTy())));
989 }
990
991 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
992}
993
994SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
995 SelectionDAG &DAG) const {
996
997 SDLoc DL(Op);
998 SDValue Vector = Op.getOperand(0);
999 SDValue Index = Op.getOperand(1);
1000
1001 if (isa<ConstantSDNode>(Index) ||
1002 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1003 return Op;
1004
1005 Vector = vectorToVerticalVector(DAG, Vector);
1006 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
1007 Vector, Index);
1008}
1009
1010SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1011 SelectionDAG &DAG) const {
1012 SDLoc DL(Op);
1013 SDValue Vector = Op.getOperand(0);
1014 SDValue Value = Op.getOperand(1);
1015 SDValue Index = Op.getOperand(2);
1016
1017 if (isa<ConstantSDNode>(Index) ||
1018 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1019 return Op;
1020
1021 Vector = vectorToVerticalVector(DAG, Vector);
1022 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
1023 Vector, Value, Index);
1024 return vectorToVerticalVector(DAG, Insert);
1025}
1026
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001027SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1028 // On hw >= R700, COS/SIN input must be between -1. and 1.
1029 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
1030 EVT VT = Op.getValueType();
1031 SDValue Arg = Op.getOperand(0);
1032 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
1033 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
1034 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
1035 DAG.getConstantFP(0.15915494309, MVT::f32)),
1036 DAG.getConstantFP(0.5, MVT::f32)));
1037 unsigned TrigNode;
1038 switch (Op.getOpcode()) {
1039 case ISD::FCOS:
1040 TrigNode = AMDGPUISD::COS_HW;
1041 break;
1042 case ISD::FSIN:
1043 TrigNode = AMDGPUISD::SIN_HW;
1044 break;
1045 default:
1046 llvm_unreachable("Wrong trig opcode");
1047 }
1048 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
1049 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
1050 DAG.getConstantFP(-0.5, MVT::f32)));
1051 if (Gen >= AMDGPUSubtarget::R700)
1052 return TrigVal;
1053 // On R600 hw, COS/SIN input must be between -Pi and Pi.
1054 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1055 DAG.getConstantFP(3.14159265359, MVT::f32));
1056}
1057
Jan Vesely25f36272014-06-18 12:27:13 +00001058SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1059 SDLoc DL(Op);
1060 EVT VT = Op.getValueType();
1061
1062 SDValue Lo = Op.getOperand(0);
1063 SDValue Hi = Op.getOperand(1);
1064 SDValue Shift = Op.getOperand(2);
1065 SDValue Zero = DAG.getConstant(0, VT);
1066 SDValue One = DAG.getConstant(1, VT);
1067
1068 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1069 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1070 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1071 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1072
1073 // The dance around Width1 is necessary for 0 special case.
1074 // Without it the CompShift might be 32, producing incorrect results in
1075 // Overflow. So we do the shift in two steps, the alternative is to
1076 // add a conditional to filter the special case.
1077
1078 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1079 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1080
1081 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1082 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1083 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1084
1085 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1086 SDValue LoBig = Zero;
1087
1088 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1089 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1090
1091 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1092}
1093
Jan Vesely900ff2e2014-06-18 12:27:15 +00001094SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1095 SDLoc DL(Op);
1096 EVT VT = Op.getValueType();
1097
1098 SDValue Lo = Op.getOperand(0);
1099 SDValue Hi = Op.getOperand(1);
1100 SDValue Shift = Op.getOperand(2);
1101 SDValue Zero = DAG.getConstant(0, VT);
1102 SDValue One = DAG.getConstant(1, VT);
1103
Jan Veselyecf51332014-06-18 12:27:17 +00001104 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1105
Jan Vesely900ff2e2014-06-18 12:27:15 +00001106 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1107 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1108 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1109 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1110
1111 // The dance around Width1 is necessary for 0 special case.
1112 // Without it the CompShift might be 32, producing incorrect results in
1113 // Overflow. So we do the shift in two steps, the alternative is to
1114 // add a conditional to filter the special case.
1115
1116 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1117 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1118
Jan Veselyecf51332014-06-18 12:27:17 +00001119 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001120 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1121 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1122
Jan Veselyecf51332014-06-18 12:27:17 +00001123 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1124 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001125
1126 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1127 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1128
1129 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1130}
1131
Tom Stellard75aadc22012-12-11 21:25:42 +00001132SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1133 return DAG.getNode(
1134 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001135 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001136 MVT::i1,
1137 Op, DAG.getConstantFP(0.0f, MVT::f32),
1138 DAG.getCondCode(ISD::SETNE)
1139 );
1140}
1141
Tom Stellard75aadc22012-12-11 21:25:42 +00001142SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001143 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001144 unsigned DwordOffset) const {
1145 unsigned ByteOffset = DwordOffset * 4;
1146 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001147 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001148
1149 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1150 assert(isInt<16>(ByteOffset));
1151
1152 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1153 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1154 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1155 false, false, false, 0);
1156}
1157
Tom Stellard75aadc22012-12-11 21:25:42 +00001158bool R600TargetLowering::isZero(SDValue Op) const {
1159 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1160 return Cst->isNullValue();
1161 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1162 return CstFP->isZero();
1163 } else {
1164 return false;
1165 }
1166}
1167
1168SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001169 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001170 EVT VT = Op.getValueType();
1171
1172 SDValue LHS = Op.getOperand(0);
1173 SDValue RHS = Op.getOperand(1);
1174 SDValue True = Op.getOperand(2);
1175 SDValue False = Op.getOperand(3);
1176 SDValue CC = Op.getOperand(4);
1177 SDValue Temp;
1178
1179 // LHS and RHS are guaranteed to be the same value type
1180 EVT CompareVT = LHS.getValueType();
1181
1182 // Check if we can lower this to a native operation.
1183
Tom Stellard2add82d2013-03-08 15:37:09 +00001184 // Try to lower to a SET* instruction:
1185 //
1186 // SET* can match the following patterns:
1187 //
Tom Stellardcd428182013-09-28 02:50:38 +00001188 // select_cc f32, f32, -1, 0, cc_supported
1189 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1190 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001191 //
1192
1193 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001194 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1195 ISD::CondCode InverseCC =
1196 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001197 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1198 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1199 std::swap(False, True);
1200 CC = DAG.getCondCode(InverseCC);
1201 } else {
1202 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1203 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1204 std::swap(False, True);
1205 std::swap(LHS, RHS);
1206 CC = DAG.getCondCode(SwapInvCC);
1207 }
1208 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001209 }
1210
1211 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1212 (CompareVT == VT || VT == MVT::i32)) {
1213 // This can be matched by a SET* instruction.
1214 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1215 }
1216
Tom Stellard75aadc22012-12-11 21:25:42 +00001217 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001218 //
1219 // CND* can match the following patterns:
1220 //
Tom Stellardcd428182013-09-28 02:50:38 +00001221 // select_cc f32, 0.0, f32, f32, cc_supported
1222 // select_cc f32, 0.0, i32, i32, cc_supported
1223 // select_cc i32, 0, f32, f32, cc_supported
1224 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001225 //
Tom Stellardcd428182013-09-28 02:50:38 +00001226
1227 // Try to move the zero value to the RHS
1228 if (isZero(LHS)) {
1229 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1230 // Try swapping the operands
1231 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1232 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1233 std::swap(LHS, RHS);
1234 CC = DAG.getCondCode(CCSwapped);
1235 } else {
1236 // Try inverting the conditon and then swapping the operands
1237 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1238 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1239 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1240 std::swap(True, False);
1241 std::swap(LHS, RHS);
1242 CC = DAG.getCondCode(CCSwapped);
1243 }
1244 }
1245 }
1246 if (isZero(RHS)) {
1247 SDValue Cond = LHS;
1248 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001249 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1250 if (CompareVT != VT) {
1251 // Bitcast True / False to the correct types. This will end up being
1252 // a nop, but it allows us to define only a single pattern in the
1253 // .TD files for each CND* instruction rather than having to have
1254 // one pattern for integer True/False and one for fp True/False
1255 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1256 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1257 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001258
1259 switch (CCOpcode) {
1260 case ISD::SETONE:
1261 case ISD::SETUNE:
1262 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001263 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1264 Temp = True;
1265 True = False;
1266 False = Temp;
1267 break;
1268 default:
1269 break;
1270 }
1271 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1272 Cond, Zero,
1273 True, False,
1274 DAG.getCondCode(CCOpcode));
1275 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1276 }
1277
Tom Stellard75aadc22012-12-11 21:25:42 +00001278 // If we make it this for it means we have no native instructions to handle
1279 // this SELECT_CC, so we must lower it.
1280 SDValue HWTrue, HWFalse;
1281
1282 if (CompareVT == MVT::f32) {
1283 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1284 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1285 } else if (CompareVT == MVT::i32) {
1286 HWTrue = DAG.getConstant(-1, CompareVT);
1287 HWFalse = DAG.getConstant(0, CompareVT);
1288 }
1289 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001290 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001291 }
1292
1293 // Lower this unsupported SELECT_CC into a combination of two supported
1294 // SELECT_CC operations.
1295 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1296
1297 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1298 Cond, HWFalse,
1299 True, False,
1300 DAG.getCondCode(ISD::SETNE));
1301}
1302
Alp Tokercb402912014-01-24 17:20:08 +00001303/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001304/// convert these pointers to a register index. Each register holds
1305/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1306/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1307/// for indirect addressing.
1308SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1309 unsigned StackWidth,
1310 SelectionDAG &DAG) const {
1311 unsigned SRLPad;
1312 switch(StackWidth) {
1313 case 1:
1314 SRLPad = 2;
1315 break;
1316 case 2:
1317 SRLPad = 3;
1318 break;
1319 case 4:
1320 SRLPad = 4;
1321 break;
1322 default: llvm_unreachable("Invalid stack width");
1323 }
1324
Andrew Trickef9de2a2013-05-25 02:42:55 +00001325 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001326 DAG.getConstant(SRLPad, MVT::i32));
1327}
1328
1329void R600TargetLowering::getStackAddress(unsigned StackWidth,
1330 unsigned ElemIdx,
1331 unsigned &Channel,
1332 unsigned &PtrIncr) const {
1333 switch (StackWidth) {
1334 default:
1335 case 1:
1336 Channel = 0;
1337 if (ElemIdx > 0) {
1338 PtrIncr = 1;
1339 } else {
1340 PtrIncr = 0;
1341 }
1342 break;
1343 case 2:
1344 Channel = ElemIdx % 2;
1345 if (ElemIdx == 2) {
1346 PtrIncr = 1;
1347 } else {
1348 PtrIncr = 0;
1349 }
1350 break;
1351 case 4:
1352 Channel = ElemIdx;
1353 PtrIncr = 0;
1354 break;
1355 }
1356}
1357
Tom Stellard75aadc22012-12-11 21:25:42 +00001358SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001359 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001360 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1361 SDValue Chain = Op.getOperand(0);
1362 SDValue Value = Op.getOperand(1);
1363 SDValue Ptr = Op.getOperand(2);
1364
Tom Stellard2ffc3302013-08-26 15:05:44 +00001365 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001366 if (Result.getNode()) {
1367 return Result;
1368 }
1369
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001370 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1371 if (StoreNode->isTruncatingStore()) {
1372 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001373 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001374 EVT MemVT = StoreNode->getMemoryVT();
1375 SDValue MaskConstant;
1376 if (MemVT == MVT::i8) {
1377 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1378 } else {
1379 assert(MemVT == MVT::i16);
1380 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1381 }
1382 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1383 DAG.getConstant(2, MVT::i32));
1384 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1385 DAG.getConstant(0x00000003, VT));
1386 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1387 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1388 DAG.getConstant(3, VT));
1389 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1390 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1391 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1392 // vector instead.
1393 SDValue Src[4] = {
1394 ShiftedValue,
1395 DAG.getConstant(0, MVT::i32),
1396 DAG.getConstant(0, MVT::i32),
1397 Mask
1398 };
Craig Topper48d114b2014-04-26 18:35:24 +00001399 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001400 SDValue Args[3] = { Chain, Input, DWordAddr };
1401 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001402 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001403 StoreNode->getMemOperand());
1404 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1405 Value.getValueType().bitsGE(MVT::i32)) {
1406 // Convert pointer from byte address to dword address.
1407 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1408 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1409 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001410
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001411 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001412 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001413 } else {
1414 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1415 }
1416 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001417 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001418 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001419
1420 EVT ValueVT = Value.getValueType();
1421
1422 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1423 return SDValue();
1424 }
1425
Tom Stellarde9373602014-01-22 19:24:14 +00001426 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1427 if (Ret.getNode()) {
1428 return Ret;
1429 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001430 // Lowering for indirect addressing
1431
1432 const MachineFunction &MF = DAG.getMachineFunction();
1433 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1434 getTargetMachine().getFrameLowering());
1435 unsigned StackWidth = TFL->getStackWidth(MF);
1436
1437 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1438
1439 if (ValueVT.isVector()) {
1440 unsigned NumElemVT = ValueVT.getVectorNumElements();
1441 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001442 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001443
1444 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1445 "vector width in load");
1446
1447 for (unsigned i = 0; i < NumElemVT; ++i) {
1448 unsigned Channel, PtrIncr;
1449 getStackAddress(StackWidth, i, Channel, PtrIncr);
1450 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1451 DAG.getConstant(PtrIncr, MVT::i32));
1452 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1453 Value, DAG.getConstant(i, MVT::i32));
1454
1455 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1456 Chain, Elem, Ptr,
1457 DAG.getTargetConstant(Channel, MVT::i32));
1458 }
Craig Topper48d114b2014-04-26 18:35:24 +00001459 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001460 } else {
1461 if (ValueVT == MVT::i8) {
1462 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1463 }
1464 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001465 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001466 }
1467
1468 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001469}
1470
Tom Stellard365366f2013-01-23 02:09:06 +00001471// return (512 + (kc_bank << 12)
1472static int
1473ConstantAddressBlock(unsigned AddressSpace) {
1474 switch (AddressSpace) {
1475 case AMDGPUAS::CONSTANT_BUFFER_0:
1476 return 512;
1477 case AMDGPUAS::CONSTANT_BUFFER_1:
1478 return 512 + 4096;
1479 case AMDGPUAS::CONSTANT_BUFFER_2:
1480 return 512 + 4096 * 2;
1481 case AMDGPUAS::CONSTANT_BUFFER_3:
1482 return 512 + 4096 * 3;
1483 case AMDGPUAS::CONSTANT_BUFFER_4:
1484 return 512 + 4096 * 4;
1485 case AMDGPUAS::CONSTANT_BUFFER_5:
1486 return 512 + 4096 * 5;
1487 case AMDGPUAS::CONSTANT_BUFFER_6:
1488 return 512 + 4096 * 6;
1489 case AMDGPUAS::CONSTANT_BUFFER_7:
1490 return 512 + 4096 * 7;
1491 case AMDGPUAS::CONSTANT_BUFFER_8:
1492 return 512 + 4096 * 8;
1493 case AMDGPUAS::CONSTANT_BUFFER_9:
1494 return 512 + 4096 * 9;
1495 case AMDGPUAS::CONSTANT_BUFFER_10:
1496 return 512 + 4096 * 10;
1497 case AMDGPUAS::CONSTANT_BUFFER_11:
1498 return 512 + 4096 * 11;
1499 case AMDGPUAS::CONSTANT_BUFFER_12:
1500 return 512 + 4096 * 12;
1501 case AMDGPUAS::CONSTANT_BUFFER_13:
1502 return 512 + 4096 * 13;
1503 case AMDGPUAS::CONSTANT_BUFFER_14:
1504 return 512 + 4096 * 14;
1505 case AMDGPUAS::CONSTANT_BUFFER_15:
1506 return 512 + 4096 * 15;
1507 default:
1508 return -1;
1509 }
1510}
1511
1512SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1513{
1514 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001515 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001516 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1517 SDValue Chain = Op.getOperand(0);
1518 SDValue Ptr = Op.getOperand(1);
1519 SDValue LoweredLoad;
1520
Tom Stellarde9373602014-01-22 19:24:14 +00001521 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1522 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001523 SDValue Ops[2] = {
1524 Ret,
1525 Chain
1526 };
Craig Topper64941d92014-04-27 19:20:57 +00001527 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001528 }
1529
Tom Stellard067c8152014-07-21 14:01:14 +00001530 // Lower loads constant address space global variable loads
1531 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
1532 isa<GlobalVariable>(
1533 GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
1534
1535 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1536 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1537 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1538 DAG.getConstant(2, MVT::i32));
1539 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1540 LoadNode->getChain(), Ptr,
1541 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
1542 }
Tom Stellarde9373602014-01-22 19:24:14 +00001543
Tom Stellard35bb18c2013-08-26 15:06:04 +00001544 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1545 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001546 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001547 Chain
1548 };
Craig Topper64941d92014-04-27 19:20:57 +00001549 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001550 }
1551
Tom Stellard365366f2013-01-23 02:09:06 +00001552 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001553 if (ConstantBlock > -1 &&
1554 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1555 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001556 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001557 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1558 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001559 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001560 SDValue Slots[4];
1561 for (unsigned i = 0; i < 4; i++) {
1562 // We want Const position encoded with the following formula :
1563 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1564 // const_index is Ptr computed by llvm using an alignment of 16.
1565 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1566 // then div by 4 at the ISel step
1567 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1568 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1569 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1570 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001571 EVT NewVT = MVT::v4i32;
1572 unsigned NumElements = 4;
1573 if (VT.isVector()) {
1574 NewVT = VT;
1575 NumElements = VT.getVectorNumElements();
1576 }
Craig Topper48d114b2014-04-26 18:35:24 +00001577 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001578 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001579 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001580 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001581 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001582 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001583 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001584 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001585 );
1586 }
1587
1588 if (!VT.isVector()) {
1589 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1590 DAG.getConstant(0, MVT::i32));
1591 }
1592
1593 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001594 Result,
1595 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001596 };
Craig Topper64941d92014-04-27 19:20:57 +00001597 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001598 }
1599
Matt Arsenault909d0c02013-10-30 23:43:29 +00001600 // For most operations returning SDValue() will result in the node being
1601 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1602 // need to manually expand loads that may be legal in some address spaces and
1603 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1604 // compute shaders, since the data is sign extended when it is uploaded to the
1605 // buffer. However SEXT loads from other address spaces are not supported, so
1606 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001607 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1608 EVT MemVT = LoadNode->getMemoryVT();
1609 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1610 SDValue ShiftAmount =
1611 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1612 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1613 LoadNode->getPointerInfo(), MemVT,
1614 LoadNode->isVolatile(),
1615 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001616 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001617 LoadNode->getAlignment());
1618 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1619 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1620
1621 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001622 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001623 }
1624
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001625 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1626 return SDValue();
1627 }
1628
1629 // Lowering for indirect addressing
1630 const MachineFunction &MF = DAG.getMachineFunction();
1631 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1632 getTargetMachine().getFrameLowering());
1633 unsigned StackWidth = TFL->getStackWidth(MF);
1634
1635 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1636
1637 if (VT.isVector()) {
1638 unsigned NumElemVT = VT.getVectorNumElements();
1639 EVT ElemVT = VT.getVectorElementType();
1640 SDValue Loads[4];
1641
1642 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1643 "vector width in load");
1644
1645 for (unsigned i = 0; i < NumElemVT; ++i) {
1646 unsigned Channel, PtrIncr;
1647 getStackAddress(StackWidth, i, Channel, PtrIncr);
1648 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1649 DAG.getConstant(PtrIncr, MVT::i32));
1650 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1651 Chain, Ptr,
1652 DAG.getTargetConstant(Channel, MVT::i32),
1653 Op.getOperand(2));
1654 }
1655 for (unsigned i = NumElemVT; i < 4; ++i) {
1656 Loads[i] = DAG.getUNDEF(ElemVT);
1657 }
1658 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001659 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001660 } else {
1661 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1662 Chain, Ptr,
1663 DAG.getTargetConstant(0, MVT::i32), // Channel
1664 Op.getOperand(2));
1665 }
1666
Matt Arsenault7939acd2014-04-07 16:44:24 +00001667 SDValue Ops[2] = {
1668 LoweredLoad,
1669 Chain
1670 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001671
Craig Topper64941d92014-04-27 19:20:57 +00001672 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001673}
Tom Stellard75aadc22012-12-11 21:25:42 +00001674
Matt Arsenault1d555c42014-06-23 18:00:55 +00001675SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1676 SDValue Chain = Op.getOperand(0);
1677 SDValue Cond = Op.getOperand(1);
1678 SDValue Jump = Op.getOperand(2);
1679
1680 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1681 Chain, Jump, Cond);
1682}
1683
Tom Stellard75aadc22012-12-11 21:25:42 +00001684/// XXX Only kernel functions are supported, so we can assume for now that
1685/// every function is a kernel function, but in the future we should use
1686/// separate calling conventions for kernel and non-kernel functions.
1687SDValue R600TargetLowering::LowerFormalArguments(
1688 SDValue Chain,
1689 CallingConv::ID CallConv,
1690 bool isVarArg,
1691 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001692 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001693 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001694 SmallVector<CCValAssign, 16> ArgLocs;
1695 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1696 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001697 MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault762af962014-07-13 03:06:39 +00001698 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001699
Tom Stellardaf775432013-10-23 00:44:32 +00001700 SmallVector<ISD::InputArg, 8> LocalIns;
1701
Matt Arsenault209a7b92014-04-18 07:40:20 +00001702 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001703
1704 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001705
Tom Stellard1e803092013-07-23 01:48:18 +00001706 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001707 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001708 EVT VT = Ins[i].VT;
1709 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001710
Vincent Lejeunef143af32013-11-11 22:10:24 +00001711 if (ShaderType != ShaderType::COMPUTE) {
1712 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1713 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1714 InVals.push_back(Register);
1715 continue;
1716 }
1717
Tom Stellard75aadc22012-12-11 21:25:42 +00001718 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001719 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001720
Matt Arsenaultfae02982014-03-17 18:58:11 +00001721 // i64 isn't a legal type, so the register type used ends up as i32, which
1722 // isn't expected here. It attempts to create this sextload, but it ends up
1723 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1724 // for <1 x i64>.
1725
Tom Stellardacfeebf2013-07-23 01:48:05 +00001726 // The first 36 bytes of the input buffer contains information about
1727 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001728
1729 // FIXME: This should really check the extload type, but the handling of
1730 // extload vecto parameters seems to be broken.
1731 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1732 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1733 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001734 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1735 MachinePointerInfo(UndefValue::get(PtrTy)),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001736 MemVT, false, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001737
1738 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001739 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001740 }
1741 return Chain;
1742}
1743
Matt Arsenault758659232013-05-18 00:21:46 +00001744EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001745 if (!VT.isVector())
1746 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001747 return VT.changeVectorElementTypeToInteger();
1748}
1749
Matt Arsenault209a7b92014-04-18 07:40:20 +00001750static SDValue CompactSwizzlableVector(
1751 SelectionDAG &DAG, SDValue VectorEntry,
1752 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001753 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1754 assert(RemapSwizzle.empty());
1755 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001756 VectorEntry.getOperand(0),
1757 VectorEntry.getOperand(1),
1758 VectorEntry.getOperand(2),
1759 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001760 };
1761
1762 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001763 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1764 // We mask write here to teach later passes that the ith element of this
1765 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1766 // break false dependencies and additionnaly make assembly easier to read.
1767 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001768 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1769 if (C->isZero()) {
1770 RemapSwizzle[i] = 4; // SEL_0
1771 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1772 } else if (C->isExactlyValue(1.0)) {
1773 RemapSwizzle[i] = 5; // SEL_1
1774 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1775 }
1776 }
1777
1778 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1779 continue;
1780 for (unsigned j = 0; j < i; j++) {
1781 if (NewBldVec[i] == NewBldVec[j]) {
1782 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1783 RemapSwizzle[i] = j;
1784 break;
1785 }
1786 }
1787 }
1788
1789 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001790 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001791}
1792
Benjamin Kramer193960c2013-06-11 13:32:25 +00001793static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1794 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001795 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1796 assert(RemapSwizzle.empty());
1797 SDValue NewBldVec[4] = {
1798 VectorEntry.getOperand(0),
1799 VectorEntry.getOperand(1),
1800 VectorEntry.getOperand(2),
1801 VectorEntry.getOperand(3)
1802 };
1803 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001804 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001805 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001806 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1807 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1808 ->getZExtValue();
1809 if (i == Idx)
1810 isUnmovable[Idx] = true;
1811 }
1812 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001813
1814 for (unsigned i = 0; i < 4; i++) {
1815 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1816 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1817 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001818 if (isUnmovable[Idx])
1819 continue;
1820 // Swap i and Idx
1821 std::swap(NewBldVec[Idx], NewBldVec[i]);
1822 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1823 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001824 }
1825 }
1826
1827 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001828 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001829}
1830
1831
1832SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1833SDValue Swz[4], SelectionDAG &DAG) const {
1834 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1835 // Old -> New swizzle values
1836 DenseMap<unsigned, unsigned> SwizzleRemap;
1837
1838 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1839 for (unsigned i = 0; i < 4; i++) {
1840 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1841 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1842 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1843 }
1844
1845 SwizzleRemap.clear();
1846 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1847 for (unsigned i = 0; i < 4; i++) {
1848 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1849 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1850 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1851 }
1852
1853 return BuildVector;
1854}
1855
1856
Tom Stellard75aadc22012-12-11 21:25:42 +00001857//===----------------------------------------------------------------------===//
1858// Custom DAG Optimizations
1859//===----------------------------------------------------------------------===//
1860
1861SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1862 DAGCombinerInfo &DCI) const {
1863 SelectionDAG &DAG = DCI.DAG;
1864
1865 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001866 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001867 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1868 case ISD::FP_ROUND: {
1869 SDValue Arg = N->getOperand(0);
1870 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001871 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001872 Arg.getOperand(0));
1873 }
1874 break;
1875 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001876
1877 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1878 // (i32 select_cc f32, f32, -1, 0 cc)
1879 //
1880 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1881 // this to one of the SET*_DX10 instructions.
1882 case ISD::FP_TO_SINT: {
1883 SDValue FNeg = N->getOperand(0);
1884 if (FNeg.getOpcode() != ISD::FNEG) {
1885 return SDValue();
1886 }
1887 SDValue SelectCC = FNeg.getOperand(0);
1888 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1889 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1890 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1891 !isHWTrueValue(SelectCC.getOperand(2)) ||
1892 !isHWFalseValue(SelectCC.getOperand(3))) {
1893 return SDValue();
1894 }
1895
Andrew Trickef9de2a2013-05-25 02:42:55 +00001896 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001897 SelectCC.getOperand(0), // LHS
1898 SelectCC.getOperand(1), // RHS
1899 DAG.getConstant(-1, MVT::i32), // True
1900 DAG.getConstant(0, MVT::i32), // Flase
1901 SelectCC.getOperand(4)); // CC
1902
1903 break;
1904 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001905
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001906 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1907 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001908 case ISD::INSERT_VECTOR_ELT: {
1909 SDValue InVec = N->getOperand(0);
1910 SDValue InVal = N->getOperand(1);
1911 SDValue EltNo = N->getOperand(2);
1912 SDLoc dl(N);
1913
1914 // If the inserted element is an UNDEF, just use the input vector.
1915 if (InVal.getOpcode() == ISD::UNDEF)
1916 return InVec;
1917
1918 EVT VT = InVec.getValueType();
1919
1920 // If we can't generate a legal BUILD_VECTOR, exit
1921 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1922 return SDValue();
1923
1924 // Check that we know which element is being inserted
1925 if (!isa<ConstantSDNode>(EltNo))
1926 return SDValue();
1927 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1928
1929 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1930 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1931 // vector elements.
1932 SmallVector<SDValue, 8> Ops;
1933 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1934 Ops.append(InVec.getNode()->op_begin(),
1935 InVec.getNode()->op_end());
1936 } else if (InVec.getOpcode() == ISD::UNDEF) {
1937 unsigned NElts = VT.getVectorNumElements();
1938 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1939 } else {
1940 return SDValue();
1941 }
1942
1943 // Insert the element
1944 if (Elt < Ops.size()) {
1945 // All the operands of BUILD_VECTOR must have the same type;
1946 // we enforce that here.
1947 EVT OpVT = Ops[0].getValueType();
1948 if (InVal.getValueType() != OpVT)
1949 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1950 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1951 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1952 Ops[Elt] = InVal;
1953 }
1954
1955 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001956 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001957 }
1958
Tom Stellard365366f2013-01-23 02:09:06 +00001959 // Extract_vec (Build_vector) generated by custom lowering
1960 // also needs to be customly combined
1961 case ISD::EXTRACT_VECTOR_ELT: {
1962 SDValue Arg = N->getOperand(0);
1963 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1964 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1965 unsigned Element = Const->getZExtValue();
1966 return Arg->getOperand(Element);
1967 }
1968 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001969 if (Arg.getOpcode() == ISD::BITCAST &&
1970 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1971 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1972 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001973 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001974 Arg->getOperand(0).getOperand(Element));
1975 }
1976 }
Tom Stellard365366f2013-01-23 02:09:06 +00001977 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001978
1979 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001980 // Try common optimizations
1981 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1982 if (Ret.getNode())
1983 return Ret;
1984
Tom Stellarde06163a2013-02-07 14:02:35 +00001985 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1986 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001987 //
1988 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1989 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001990 SDValue LHS = N->getOperand(0);
1991 if (LHS.getOpcode() != ISD::SELECT_CC) {
1992 return SDValue();
1993 }
1994
1995 SDValue RHS = N->getOperand(1);
1996 SDValue True = N->getOperand(2);
1997 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001998 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001999
2000 if (LHS.getOperand(2).getNode() != True.getNode() ||
2001 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002002 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002003 return SDValue();
2004 }
2005
Tom Stellard5e524892013-03-08 15:37:11 +00002006 switch (NCC) {
2007 default: return SDValue();
2008 case ISD::SETNE: return LHS;
2009 case ISD::SETEQ: {
2010 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2011 LHSCC = ISD::getSetCCInverse(LHSCC,
2012 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002013 if (DCI.isBeforeLegalizeOps() ||
2014 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2015 return DAG.getSelectCC(SDLoc(N),
2016 LHS.getOperand(0),
2017 LHS.getOperand(1),
2018 LHS.getOperand(2),
2019 LHS.getOperand(3),
2020 LHSCC);
2021 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002022 }
Tom Stellard5e524892013-03-08 15:37:11 +00002023 }
Tom Stellardcd428182013-09-28 02:50:38 +00002024 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002025 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002026
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002027 case AMDGPUISD::EXPORT: {
2028 SDValue Arg = N->getOperand(1);
2029 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2030 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002031
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002032 SDValue NewArgs[8] = {
2033 N->getOperand(0), // Chain
2034 SDValue(),
2035 N->getOperand(2), // ArrayBase
2036 N->getOperand(3), // Type
2037 N->getOperand(4), // SWZ_X
2038 N->getOperand(5), // SWZ_Y
2039 N->getOperand(6), // SWZ_Z
2040 N->getOperand(7) // SWZ_W
2041 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002042 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002043 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002044 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002045 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002046 case AMDGPUISD::TEXTURE_FETCH: {
2047 SDValue Arg = N->getOperand(1);
2048 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2049 break;
2050
2051 SDValue NewArgs[19] = {
2052 N->getOperand(0),
2053 N->getOperand(1),
2054 N->getOperand(2),
2055 N->getOperand(3),
2056 N->getOperand(4),
2057 N->getOperand(5),
2058 N->getOperand(6),
2059 N->getOperand(7),
2060 N->getOperand(8),
2061 N->getOperand(9),
2062 N->getOperand(10),
2063 N->getOperand(11),
2064 N->getOperand(12),
2065 N->getOperand(13),
2066 N->getOperand(14),
2067 N->getOperand(15),
2068 N->getOperand(16),
2069 N->getOperand(17),
2070 N->getOperand(18),
2071 };
2072 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2073 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002074 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002075 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002076 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002077
2078 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002079}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002080
2081static bool
2082FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002083 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002084 const R600InstrInfo *TII =
2085 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2086 if (!Src.isMachineOpcode())
2087 return false;
2088 switch (Src.getMachineOpcode()) {
2089 case AMDGPU::FNEG_R600:
2090 if (!Neg.getNode())
2091 return false;
2092 Src = Src.getOperand(0);
2093 Neg = DAG.getTargetConstant(1, MVT::i32);
2094 return true;
2095 case AMDGPU::FABS_R600:
2096 if (!Abs.getNode())
2097 return false;
2098 Src = Src.getOperand(0);
2099 Abs = DAG.getTargetConstant(1, MVT::i32);
2100 return true;
2101 case AMDGPU::CONST_COPY: {
2102 unsigned Opcode = ParentNode->getMachineOpcode();
2103 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2104
2105 if (!Sel.getNode())
2106 return false;
2107
2108 SDValue CstOffset = Src.getOperand(0);
2109 if (ParentNode->getValueType(0).isVector())
2110 return false;
2111
2112 // Gather constants values
2113 int SrcIndices[] = {
2114 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2115 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2116 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2117 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2118 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2119 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2120 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2121 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2122 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2123 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2124 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2125 };
2126 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002127 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002128 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2129 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2130 continue;
2131 if (HasDst) {
2132 OtherSrcIdx--;
2133 OtherSelIdx--;
2134 }
2135 if (RegisterSDNode *Reg =
2136 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2137 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002138 ConstantSDNode *Cst
2139 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002140 Consts.push_back(Cst->getZExtValue());
2141 }
2142 }
2143 }
2144
Matt Arsenault37c12d72014-05-12 20:42:57 +00002145 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002146 Consts.push_back(Cst->getZExtValue());
2147 if (!TII->fitsConstReadLimitations(Consts)) {
2148 return false;
2149 }
2150
2151 Sel = CstOffset;
2152 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2153 return true;
2154 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002155 case AMDGPU::MOV_IMM_I32:
2156 case AMDGPU::MOV_IMM_F32: {
2157 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2158 uint64_t ImmValue = 0;
2159
2160
2161 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2162 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2163 float FloatValue = FPC->getValueAPF().convertToFloat();
2164 if (FloatValue == 0.0) {
2165 ImmReg = AMDGPU::ZERO;
2166 } else if (FloatValue == 0.5) {
2167 ImmReg = AMDGPU::HALF;
2168 } else if (FloatValue == 1.0) {
2169 ImmReg = AMDGPU::ONE;
2170 } else {
2171 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2172 }
2173 } else {
2174 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2175 uint64_t Value = C->getZExtValue();
2176 if (Value == 0) {
2177 ImmReg = AMDGPU::ZERO;
2178 } else if (Value == 1) {
2179 ImmReg = AMDGPU::ONE_INT;
2180 } else {
2181 ImmValue = Value;
2182 }
2183 }
2184
2185 // Check that we aren't already using an immediate.
2186 // XXX: It's possible for an instruction to have more than one
2187 // immediate operand, but this is not supported yet.
2188 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2189 if (!Imm.getNode())
2190 return false;
2191 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2192 assert(C);
2193 if (C->getZExtValue())
2194 return false;
2195 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2196 }
2197 Src = DAG.getRegister(ImmReg, MVT::i32);
2198 return true;
2199 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002200 default:
2201 return false;
2202 }
2203}
2204
2205
2206/// \brief Fold the instructions after selecting them
2207SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2208 SelectionDAG &DAG) const {
2209 const R600InstrInfo *TII =
2210 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2211 if (!Node->isMachineOpcode())
2212 return Node;
2213 unsigned Opcode = Node->getMachineOpcode();
2214 SDValue FakeOp;
2215
2216 std::vector<SDValue> Ops;
Craig Topper66e588b2014-06-29 00:40:57 +00002217 for (const SDUse &I : Node->ops())
2218 Ops.push_back(I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002219
2220 if (Opcode == AMDGPU::DOT_4) {
2221 int OperandIdx[] = {
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2223 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2224 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2228 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2229 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002230 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002231 int NegIdx[] = {
2232 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2233 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2234 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2235 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2236 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2237 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2238 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2239 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2240 };
2241 int AbsIdx[] = {
2242 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2243 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2244 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2245 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2246 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2247 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2248 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2249 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2250 };
2251 for (unsigned i = 0; i < 8; i++) {
2252 if (OperandIdx[i] < 0)
2253 return Node;
2254 SDValue &Src = Ops[OperandIdx[i] - 1];
2255 SDValue &Neg = Ops[NegIdx[i] - 1];
2256 SDValue &Abs = Ops[AbsIdx[i] - 1];
2257 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2258 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2259 if (HasDst)
2260 SelIdx--;
2261 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002262 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2263 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2264 }
2265 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2266 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2267 SDValue &Src = Ops[i];
2268 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002269 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2270 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002271 } else if (Opcode == AMDGPU::CLAMP_R600) {
2272 SDValue Src = Node->getOperand(0);
2273 if (!Src.isMachineOpcode() ||
2274 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2275 return Node;
2276 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2277 AMDGPU::OpName::clamp);
2278 if (ClampIdx < 0)
2279 return Node;
2280 std::vector<SDValue> Ops;
2281 unsigned NumOp = Src.getNumOperands();
2282 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002283 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002284 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2285 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2286 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002287 } else {
2288 if (!TII->hasInstrModifiers(Opcode))
2289 return Node;
2290 int OperandIdx[] = {
2291 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2292 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2293 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2294 };
2295 int NegIdx[] = {
2296 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2297 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2298 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2299 };
2300 int AbsIdx[] = {
2301 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2302 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2303 -1
2304 };
2305 for (unsigned i = 0; i < 3; i++) {
2306 if (OperandIdx[i] < 0)
2307 return Node;
2308 SDValue &Src = Ops[OperandIdx[i] - 1];
2309 SDValue &Neg = Ops[NegIdx[i] - 1];
2310 SDValue FakeAbs;
2311 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2312 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2313 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002314 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2315 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002316 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002317 ImmIdx--;
2318 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002319 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002320 SDValue &Imm = Ops[ImmIdx];
2321 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002322 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2323 }
2324 }
2325
2326 return Node;
2327}