blob: 4c603f8e4bc5aee75dbc40c4c5d03d17cf3a20cb [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
33R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000034 AMDGPUTargetLowering(TM),
35 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Tom Stellard75aadc22012-12-11 21:25:42 +000043 computeRegisterProperties();
44
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Matt Arsenault4e466652014-04-16 01:41:30 +000094 // Expand sign extension of vectors
95 if (!Subtarget->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
97
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
105
106 if (!Subtarget->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
114
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
116
117
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000120 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000121 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000125 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
127 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
128 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000129 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
130 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
131
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000132 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000134 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000135 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000136 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
137 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000138
Tom Stellard365366f2013-01-23 02:09:06 +0000139 setOperationAction(ISD::LOAD, MVT::i32, Custom);
140 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000141 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
142
Tom Stellard880a80a2014-06-17 16:53:14 +0000143 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
144 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
145 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
146 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
147
148 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
149 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
150 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
151 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
152
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000154 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000155 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000156 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000157 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000158
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000159 setOperationAction(ISD::SUB, MVT::i64, Expand);
160
Tom Stellard5f337882014-04-29 23:12:43 +0000161 // These should be replaced by UDVIREM, but it does not happen automatically
162 // during Type Legalization
163 setOperationAction(ISD::UDIV, MVT::i64, Custom);
164 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000165 setOperationAction(ISD::SDIV, MVT::i64, Custom);
166 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000167
Jan Vesely25f36272014-06-18 12:27:13 +0000168 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
169 // to be Legal/Custom in order to avoid library calls.
170 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000171 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000172 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000173
Michel Danzer49812b52013-07-10 16:37:07 +0000174 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
175
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000176 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
177 for (MVT VT : ScalarIntVTs) {
178 setOperationAction(ISD::ADDC, VT, Expand);
179 setOperationAction(ISD::SUBC, VT, Expand);
180 setOperationAction(ISD::ADDE, VT, Expand);
181 setOperationAction(ISD::SUBE, VT, Expand);
182 }
183
Tom Stellardb852af52013-03-08 15:37:03 +0000184 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000185 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000186 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000187}
188
189MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
190 MachineInstr * MI, MachineBasicBlock * BB) const {
191 MachineFunction * MF = BB->getParent();
192 MachineRegisterInfo &MRI = MF->getRegInfo();
193 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000194 const R600InstrInfo *TII =
195 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000196
197 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000198 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000199 // Replace LDS_*_RET instruction that don't have any uses with the
200 // equivalent LDS_*_NORET instruction.
201 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000202 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
203 assert(DstIdx != -1);
204 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000205 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
206 return BB;
207
208 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
209 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000210 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
211 NewMI.addOperand(MI->getOperand(i));
212 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000213 } else {
214 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
215 }
216 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::CLAMP_R600: {
218 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
219 AMDGPU::MOV,
220 MI->getOperand(0).getReg(),
221 MI->getOperand(1).getReg());
222 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
223 break;
224 }
225
226 case AMDGPU::FABS_R600: {
227 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
228 AMDGPU::MOV,
229 MI->getOperand(0).getReg(),
230 MI->getOperand(1).getReg());
231 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
232 break;
233 }
234
235 case AMDGPU::FNEG_R600: {
236 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
237 AMDGPU::MOV,
238 MI->getOperand(0).getReg(),
239 MI->getOperand(1).getReg());
240 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
241 break;
242 }
243
Tom Stellard75aadc22012-12-11 21:25:42 +0000244 case AMDGPU::MASK_WRITE: {
245 unsigned maskedRegister = MI->getOperand(0).getReg();
246 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
247 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
248 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
249 break;
250 }
251
252 case AMDGPU::MOV_IMM_F32:
253 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
254 MI->getOperand(1).getFPImm()->getValueAPF()
255 .bitcastToAPInt().getZExtValue());
256 break;
257 case AMDGPU::MOV_IMM_I32:
258 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
259 MI->getOperand(1).getImm());
260 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000261 case AMDGPU::CONST_COPY: {
262 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
263 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000264 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000265 MI->getOperand(1).getImm());
266 break;
267 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000268
269 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000270 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000271 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000272 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000273
274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
275 .addOperand(MI->getOperand(0))
276 .addOperand(MI->getOperand(1))
277 .addImm(EOP); // Set End of program bit
278 break;
279 }
280
Tom Stellard75aadc22012-12-11 21:25:42 +0000281 case AMDGPU::TXD: {
282 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
283 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000284 MachineOperand &RID = MI->getOperand(4);
285 MachineOperand &SID = MI->getOperand(5);
286 unsigned TextureId = MI->getOperand(6).getImm();
287 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
288 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000289
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000290 switch (TextureId) {
291 case 5: // Rect
292 CTX = CTY = 0;
293 break;
294 case 6: // Shadow1D
295 SrcW = SrcZ;
296 break;
297 case 7: // Shadow2D
298 SrcW = SrcZ;
299 break;
300 case 8: // ShadowRect
301 CTX = CTY = 0;
302 SrcW = SrcZ;
303 break;
304 case 9: // 1DArray
305 SrcZ = SrcY;
306 CTZ = 0;
307 break;
308 case 10: // 2DArray
309 CTZ = 0;
310 break;
311 case 11: // Shadow1DArray
312 SrcZ = SrcY;
313 CTZ = 0;
314 break;
315 case 12: // Shadow2DArray
316 CTZ = 0;
317 break;
318 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000319 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
320 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 .addImm(SrcX)
322 .addImm(SrcY)
323 .addImm(SrcZ)
324 .addImm(SrcW)
325 .addImm(0)
326 .addImm(0)
327 .addImm(0)
328 .addImm(0)
329 .addImm(1)
330 .addImm(2)
331 .addImm(3)
332 .addOperand(RID)
333 .addOperand(SID)
334 .addImm(CTX)
335 .addImm(CTY)
336 .addImm(CTZ)
337 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000338 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
339 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000340 .addImm(SrcX)
341 .addImm(SrcY)
342 .addImm(SrcZ)
343 .addImm(SrcW)
344 .addImm(0)
345 .addImm(0)
346 .addImm(0)
347 .addImm(0)
348 .addImm(1)
349 .addImm(2)
350 .addImm(3)
351 .addOperand(RID)
352 .addOperand(SID)
353 .addImm(CTX)
354 .addImm(CTY)
355 .addImm(CTZ)
356 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
358 .addOperand(MI->getOperand(0))
359 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000360 .addImm(SrcX)
361 .addImm(SrcY)
362 .addImm(SrcZ)
363 .addImm(SrcW)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(1)
369 .addImm(2)
370 .addImm(3)
371 .addOperand(RID)
372 .addOperand(SID)
373 .addImm(CTX)
374 .addImm(CTY)
375 .addImm(CTZ)
376 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000377 .addReg(T0, RegState::Implicit)
378 .addReg(T1, RegState::Implicit);
379 break;
380 }
381
382 case AMDGPU::TXD_SHADOW: {
383 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
384 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000385 MachineOperand &RID = MI->getOperand(4);
386 MachineOperand &SID = MI->getOperand(5);
387 unsigned TextureId = MI->getOperand(6).getImm();
388 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
389 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
390
391 switch (TextureId) {
392 case 5: // Rect
393 CTX = CTY = 0;
394 break;
395 case 6: // Shadow1D
396 SrcW = SrcZ;
397 break;
398 case 7: // Shadow2D
399 SrcW = SrcZ;
400 break;
401 case 8: // ShadowRect
402 CTX = CTY = 0;
403 SrcW = SrcZ;
404 break;
405 case 9: // 1DArray
406 SrcZ = SrcY;
407 CTZ = 0;
408 break;
409 case 10: // 2DArray
410 CTZ = 0;
411 break;
412 case 11: // Shadow1DArray
413 SrcZ = SrcY;
414 CTZ = 0;
415 break;
416 case 12: // Shadow2DArray
417 CTZ = 0;
418 break;
419 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000420
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
422 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000423 .addImm(SrcX)
424 .addImm(SrcY)
425 .addImm(SrcZ)
426 .addImm(SrcW)
427 .addImm(0)
428 .addImm(0)
429 .addImm(0)
430 .addImm(0)
431 .addImm(1)
432 .addImm(2)
433 .addImm(3)
434 .addOperand(RID)
435 .addOperand(SID)
436 .addImm(CTX)
437 .addImm(CTY)
438 .addImm(CTZ)
439 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000440 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
441 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000442 .addImm(SrcX)
443 .addImm(SrcY)
444 .addImm(SrcZ)
445 .addImm(SrcW)
446 .addImm(0)
447 .addImm(0)
448 .addImm(0)
449 .addImm(0)
450 .addImm(1)
451 .addImm(2)
452 .addImm(3)
453 .addOperand(RID)
454 .addOperand(SID)
455 .addImm(CTX)
456 .addImm(CTY)
457 .addImm(CTZ)
458 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000459 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
460 .addOperand(MI->getOperand(0))
461 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000462 .addImm(SrcX)
463 .addImm(SrcY)
464 .addImm(SrcZ)
465 .addImm(SrcW)
466 .addImm(0)
467 .addImm(0)
468 .addImm(0)
469 .addImm(0)
470 .addImm(1)
471 .addImm(2)
472 .addImm(3)
473 .addOperand(RID)
474 .addOperand(SID)
475 .addImm(CTX)
476 .addImm(CTY)
477 .addImm(CTZ)
478 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000479 .addReg(T0, RegState::Implicit)
480 .addReg(T1, RegState::Implicit);
481 break;
482 }
483
484 case AMDGPU::BRANCH:
485 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000486 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488
489 case AMDGPU::BRANCH_COND_f32: {
490 MachineInstr *NewMI =
491 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
492 AMDGPU::PREDICATE_BIT)
493 .addOperand(MI->getOperand(1))
494 .addImm(OPCODE_IS_NOT_ZERO)
495 .addImm(0); // Flags
496 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000497 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 .addOperand(MI->getOperand(0))
499 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
500 break;
501 }
502
503 case AMDGPU::BRANCH_COND_i32: {
504 MachineInstr *NewMI =
505 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
506 AMDGPU::PREDICATE_BIT)
507 .addOperand(MI->getOperand(1))
508 .addImm(OPCODE_IS_NOT_ZERO_INT)
509 .addImm(0); // Flags
510 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000511 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000512 .addOperand(MI->getOperand(0))
513 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
514 break;
515 }
516
Tom Stellard75aadc22012-12-11 21:25:42 +0000517 case AMDGPU::EG_ExportSwz:
518 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000519 // Instruction is left unmodified if its not the last one of its type
520 bool isLastInstructionOfItsType = true;
521 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000522 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000523 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000524 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000525 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
526 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
527 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
528 .getImm();
529 if (CurrentInstExportType == InstExportType) {
530 isLastInstructionOfItsType = false;
531 break;
532 }
533 }
534 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000535 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 return BB;
538 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
539 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
540 .addOperand(MI->getOperand(0))
541 .addOperand(MI->getOperand(1))
542 .addOperand(MI->getOperand(2))
543 .addOperand(MI->getOperand(3))
544 .addOperand(MI->getOperand(4))
545 .addOperand(MI->getOperand(5))
546 .addOperand(MI->getOperand(6))
547 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000548 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000549 break;
550 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000551 case AMDGPU::RETURN: {
552 // RETURN instructions must have the live-out registers as implicit uses,
553 // otherwise they appear dead.
554 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
555 MachineInstrBuilder MIB(*MF, MI);
556 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
557 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
558 return BB;
559 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000560 }
561
562 MI->eraseFromParent();
563 return BB;
564}
565
566//===----------------------------------------------------------------------===//
567// Custom DAG Lowering Operations
568//===----------------------------------------------------------------------===//
569
Tom Stellard75aadc22012-12-11 21:25:42 +0000570SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000571 MachineFunction &MF = DAG.getMachineFunction();
572 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000573 switch (Op.getOpcode()) {
574 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000575 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
576 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000577 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000578 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000579 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000580 case ISD::FCOS:
581 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000582 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000583 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000584 case ISD::LOAD: {
585 SDValue Result = LowerLOAD(Op, DAG);
586 assert((!Result.getNode() ||
587 Result.getNode()->getNumValues() == 2) &&
588 "Load should return a value and a chain");
589 return Result;
590 }
591
Matt Arsenault1d555c42014-06-23 18:00:55 +0000592 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000593 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000594 case ISD::INTRINSIC_VOID: {
595 SDValue Chain = Op.getOperand(0);
596 unsigned IntrinsicID =
597 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
598 switch (IntrinsicID) {
599 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000600 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
601 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000602 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000603 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000604 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000605 case AMDGPUIntrinsic::R600_store_swizzle: {
606 const SDValue Args[8] = {
607 Chain,
608 Op.getOperand(2), // Export Value
609 Op.getOperand(3), // ArrayBase
610 Op.getOperand(4), // Type
611 DAG.getConstant(0, MVT::i32), // SWZ_X
612 DAG.getConstant(1, MVT::i32), // SWZ_Y
613 DAG.getConstant(2, MVT::i32), // SWZ_Z
614 DAG.getConstant(3, MVT::i32) // SWZ_W
615 };
Craig Topper48d114b2014-04-26 18:35:24 +0000616 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000617 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000618
Tom Stellard75aadc22012-12-11 21:25:42 +0000619 // default for switch(IntrinsicID)
620 default: break;
621 }
622 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
623 break;
624 }
625 case ISD::INTRINSIC_WO_CHAIN: {
626 unsigned IntrinsicID =
627 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
628 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000629 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000630 switch(IntrinsicID) {
631 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000632 case AMDGPUIntrinsic::R600_load_input: {
633 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
634 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
635 MachineFunction &MF = DAG.getMachineFunction();
636 MachineRegisterInfo &MRI = MF.getRegInfo();
637 MRI.addLiveIn(Reg);
638 return DAG.getCopyFromReg(DAG.getEntryNode(),
639 SDLoc(DAG.getEntryNode()), Reg, VT);
640 }
641
642 case AMDGPUIntrinsic::R600_interp_input: {
643 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
644 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
645 MachineSDNode *interp;
646 if (ijb < 0) {
647 const MachineFunction &MF = DAG.getMachineFunction();
648 const R600InstrInfo *TII =
649 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
650 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
651 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
652 return DAG.getTargetExtractSubreg(
653 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
654 DL, MVT::f32, SDValue(interp, 0));
655 }
656 MachineFunction &MF = DAG.getMachineFunction();
657 MachineRegisterInfo &MRI = MF.getRegInfo();
658 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
659 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
660 MRI.addLiveIn(RegisterI);
661 MRI.addLiveIn(RegisterJ);
662 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
663 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
664 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
665 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
666
667 if (slot % 4 < 2)
668 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
669 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
670 RegisterJNode, RegisterINode);
671 else
672 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
673 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
674 RegisterJNode, RegisterINode);
675 return SDValue(interp, slot % 2);
676 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000677 case AMDGPUIntrinsic::R600_interp_xy:
678 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000680 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000681 SDValue RegisterINode = Op.getOperand(2);
682 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000683
Vincent Lejeunef143af32013-11-11 22:10:24 +0000684 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000685 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000686 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000687 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000688 else
689 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000690 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000691 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000692 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
693 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000694 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000695 case AMDGPUIntrinsic::R600_tex:
696 case AMDGPUIntrinsic::R600_texc:
697 case AMDGPUIntrinsic::R600_txl:
698 case AMDGPUIntrinsic::R600_txlc:
699 case AMDGPUIntrinsic::R600_txb:
700 case AMDGPUIntrinsic::R600_txbc:
701 case AMDGPUIntrinsic::R600_txf:
702 case AMDGPUIntrinsic::R600_txq:
703 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000704 case AMDGPUIntrinsic::R600_ddy:
705 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000706 unsigned TextureOp;
707 switch (IntrinsicID) {
708 case AMDGPUIntrinsic::R600_tex:
709 TextureOp = 0;
710 break;
711 case AMDGPUIntrinsic::R600_texc:
712 TextureOp = 1;
713 break;
714 case AMDGPUIntrinsic::R600_txl:
715 TextureOp = 2;
716 break;
717 case AMDGPUIntrinsic::R600_txlc:
718 TextureOp = 3;
719 break;
720 case AMDGPUIntrinsic::R600_txb:
721 TextureOp = 4;
722 break;
723 case AMDGPUIntrinsic::R600_txbc:
724 TextureOp = 5;
725 break;
726 case AMDGPUIntrinsic::R600_txf:
727 TextureOp = 6;
728 break;
729 case AMDGPUIntrinsic::R600_txq:
730 TextureOp = 7;
731 break;
732 case AMDGPUIntrinsic::R600_ddx:
733 TextureOp = 8;
734 break;
735 case AMDGPUIntrinsic::R600_ddy:
736 TextureOp = 9;
737 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000738 case AMDGPUIntrinsic::R600_ldptr:
739 TextureOp = 10;
740 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000741 default:
742 llvm_unreachable("Unknow Texture Operation");
743 }
744
745 SDValue TexArgs[19] = {
746 DAG.getConstant(TextureOp, MVT::i32),
747 Op.getOperand(1),
748 DAG.getConstant(0, MVT::i32),
749 DAG.getConstant(1, MVT::i32),
750 DAG.getConstant(2, MVT::i32),
751 DAG.getConstant(3, MVT::i32),
752 Op.getOperand(2),
753 Op.getOperand(3),
754 Op.getOperand(4),
755 DAG.getConstant(0, MVT::i32),
756 DAG.getConstant(1, MVT::i32),
757 DAG.getConstant(2, MVT::i32),
758 DAG.getConstant(3, MVT::i32),
759 Op.getOperand(5),
760 Op.getOperand(6),
761 Op.getOperand(7),
762 Op.getOperand(8),
763 Op.getOperand(9),
764 Op.getOperand(10)
765 };
Craig Topper48d114b2014-04-26 18:35:24 +0000766 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000767 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000768 case AMDGPUIntrinsic::AMDGPU_dp4: {
769 SDValue Args[8] = {
770 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
771 DAG.getConstant(0, MVT::i32)),
772 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
773 DAG.getConstant(0, MVT::i32)),
774 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
775 DAG.getConstant(1, MVT::i32)),
776 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
777 DAG.getConstant(1, MVT::i32)),
778 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
779 DAG.getConstant(2, MVT::i32)),
780 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
781 DAG.getConstant(2, MVT::i32)),
782 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
783 DAG.getConstant(3, MVT::i32)),
784 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
785 DAG.getConstant(3, MVT::i32))
786 };
Craig Topper48d114b2014-04-26 18:35:24 +0000787 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000788 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000789
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000792 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000793 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000794 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000795 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000800 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000806 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000807 return LowerImplicitParameter(DAG, VT, DL, 8);
808
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
811 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000812 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000813 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
814 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
817 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000818 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
820 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000821 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
823 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000824 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000825 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
826 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000827 case Intrinsic::AMDGPU_rsq:
828 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
829 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000830 }
831 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
832 break;
833 }
834 } // end switch(Op.getOpcode())
835 return SDValue();
836}
837
838void R600TargetLowering::ReplaceNodeResults(SDNode *N,
839 SmallVectorImpl<SDValue> &Results,
840 SelectionDAG &DAG) const {
841 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000842 default:
843 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
844 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000845 case ISD::FP_TO_UINT:
846 if (N->getValueType(0) == MVT::i1) {
847 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
848 return;
849 }
850 // Fall-through. Since we don't care about out of bounds values
851 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
852 // considers some extra cases which are not necessary here.
853 case ISD::FP_TO_SINT: {
854 SDValue Result;
855 if (expandFP_TO_SINT(N, Result, DAG))
856 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000857 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000858 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000859 case ISD::UDIV: {
860 SDValue Op = SDValue(N, 0);
861 SDLoc DL(Op);
862 EVT VT = Op.getValueType();
863 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
864 N->getOperand(0), N->getOperand(1));
865 Results.push_back(UDIVREM);
866 break;
867 }
868 case ISD::UREM: {
869 SDValue Op = SDValue(N, 0);
870 SDLoc DL(Op);
871 EVT VT = Op.getValueType();
872 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
873 N->getOperand(0), N->getOperand(1));
874 Results.push_back(UDIVREM.getValue(1));
875 break;
876 }
877 case ISD::SDIV: {
878 SDValue Op = SDValue(N, 0);
879 SDLoc DL(Op);
880 EVT VT = Op.getValueType();
881 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
882 N->getOperand(0), N->getOperand(1));
883 Results.push_back(SDIVREM);
884 break;
885 }
886 case ISD::SREM: {
887 SDValue Op = SDValue(N, 0);
888 SDLoc DL(Op);
889 EVT VT = Op.getValueType();
890 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
891 N->getOperand(0), N->getOperand(1));
892 Results.push_back(SDIVREM.getValue(1));
893 break;
894 }
895 case ISD::SDIVREM: {
896 SDValue Op = SDValue(N, 1);
897 SDValue RES = LowerSDIVREM(Op, DAG);
898 Results.push_back(RES);
899 Results.push_back(RES.getValue(1));
900 break;
901 }
902 case ISD::UDIVREM: {
903 SDValue Op = SDValue(N, 0);
904 SDLoc DL(Op);
905 EVT VT = Op.getValueType();
906 EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
907
908 SDValue one = DAG.getConstant(1, HalfVT);
909 SDValue zero = DAG.getConstant(0, HalfVT);
910
911 //HiLo split
912 SDValue LHS = N->getOperand(0);
913 SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
914 SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
915
916 SDValue RHS = N->getOperand(1);
917 SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
918 SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
919
920 // Get Speculative values
921 SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
922 SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
923
924 SDValue REM_Hi = zero;
925 SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
926
927 SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
928 SDValue DIV_Lo = zero;
929
930 const unsigned halfBitWidth = HalfVT.getSizeInBits();
931
932 for (unsigned i = 0; i < halfBitWidth; ++i) {
933 SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
934 // Get Value of high bit
935 SDValue HBit;
936 if (halfBitWidth == 32 && Subtarget->hasBFE()) {
937 HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
938 } else {
939 HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
940 HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
941 }
942
943 SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
944 DAG.getConstant(halfBitWidth - 1, HalfVT));
945 REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
946 REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
947
948 REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
949 REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
950
951
952 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
953
954 SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
955 SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
956
957 DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
958
959 // Update REM
960
961 SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
962
963 REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
964 REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
965 REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
966 }
967
968 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
969 SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
970 Results.push_back(DIV);
971 Results.push_back(REM);
972 break;
973 }
974 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000975}
976
Tom Stellard880a80a2014-06-17 16:53:14 +0000977SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
978 SDValue Vector) const {
979
980 SDLoc DL(Vector);
981 EVT VecVT = Vector.getValueType();
982 EVT EltVT = VecVT.getVectorElementType();
983 SmallVector<SDValue, 8> Args;
984
985 for (unsigned i = 0, e = VecVT.getVectorNumElements();
986 i != e; ++i) {
987 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
988 Vector, DAG.getConstant(i, getVectorIdxTy())));
989 }
990
991 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
992}
993
994SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
995 SelectionDAG &DAG) const {
996
997 SDLoc DL(Op);
998 SDValue Vector = Op.getOperand(0);
999 SDValue Index = Op.getOperand(1);
1000
1001 if (isa<ConstantSDNode>(Index) ||
1002 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1003 return Op;
1004
1005 Vector = vectorToVerticalVector(DAG, Vector);
1006 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
1007 Vector, Index);
1008}
1009
1010SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1011 SelectionDAG &DAG) const {
1012 SDLoc DL(Op);
1013 SDValue Vector = Op.getOperand(0);
1014 SDValue Value = Op.getOperand(1);
1015 SDValue Index = Op.getOperand(2);
1016
1017 if (isa<ConstantSDNode>(Index) ||
1018 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1019 return Op;
1020
1021 Vector = vectorToVerticalVector(DAG, Vector);
1022 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
1023 Vector, Value, Index);
1024 return vectorToVerticalVector(DAG, Insert);
1025}
1026
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001027SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1028 // On hw >= R700, COS/SIN input must be between -1. and 1.
1029 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
1030 EVT VT = Op.getValueType();
1031 SDValue Arg = Op.getOperand(0);
1032 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
1033 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
1034 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
1035 DAG.getConstantFP(0.15915494309, MVT::f32)),
1036 DAG.getConstantFP(0.5, MVT::f32)));
1037 unsigned TrigNode;
1038 switch (Op.getOpcode()) {
1039 case ISD::FCOS:
1040 TrigNode = AMDGPUISD::COS_HW;
1041 break;
1042 case ISD::FSIN:
1043 TrigNode = AMDGPUISD::SIN_HW;
1044 break;
1045 default:
1046 llvm_unreachable("Wrong trig opcode");
1047 }
1048 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
1049 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
1050 DAG.getConstantFP(-0.5, MVT::f32)));
1051 if (Gen >= AMDGPUSubtarget::R700)
1052 return TrigVal;
1053 // On R600 hw, COS/SIN input must be between -Pi and Pi.
1054 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1055 DAG.getConstantFP(3.14159265359, MVT::f32));
1056}
1057
Jan Vesely25f36272014-06-18 12:27:13 +00001058SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1059 SDLoc DL(Op);
1060 EVT VT = Op.getValueType();
1061
1062 SDValue Lo = Op.getOperand(0);
1063 SDValue Hi = Op.getOperand(1);
1064 SDValue Shift = Op.getOperand(2);
1065 SDValue Zero = DAG.getConstant(0, VT);
1066 SDValue One = DAG.getConstant(1, VT);
1067
1068 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1069 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1070 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1071 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1072
1073 // The dance around Width1 is necessary for 0 special case.
1074 // Without it the CompShift might be 32, producing incorrect results in
1075 // Overflow. So we do the shift in two steps, the alternative is to
1076 // add a conditional to filter the special case.
1077
1078 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1079 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1080
1081 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1082 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1083 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1084
1085 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1086 SDValue LoBig = Zero;
1087
1088 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1089 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1090
1091 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1092}
1093
Jan Vesely900ff2e2014-06-18 12:27:15 +00001094SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1095 SDLoc DL(Op);
1096 EVT VT = Op.getValueType();
1097
1098 SDValue Lo = Op.getOperand(0);
1099 SDValue Hi = Op.getOperand(1);
1100 SDValue Shift = Op.getOperand(2);
1101 SDValue Zero = DAG.getConstant(0, VT);
1102 SDValue One = DAG.getConstant(1, VT);
1103
Jan Veselyecf51332014-06-18 12:27:17 +00001104 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1105
Jan Vesely900ff2e2014-06-18 12:27:15 +00001106 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1107 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1108 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1109 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1110
1111 // The dance around Width1 is necessary for 0 special case.
1112 // Without it the CompShift might be 32, producing incorrect results in
1113 // Overflow. So we do the shift in two steps, the alternative is to
1114 // add a conditional to filter the special case.
1115
1116 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1117 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1118
Jan Veselyecf51332014-06-18 12:27:17 +00001119 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001120 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1121 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1122
Jan Veselyecf51332014-06-18 12:27:17 +00001123 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1124 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001125
1126 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1127 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1128
1129 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1130}
1131
Tom Stellard75aadc22012-12-11 21:25:42 +00001132SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1133 return DAG.getNode(
1134 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001135 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001136 MVT::i1,
1137 Op, DAG.getConstantFP(0.0f, MVT::f32),
1138 DAG.getCondCode(ISD::SETNE)
1139 );
1140}
1141
Tom Stellard75aadc22012-12-11 21:25:42 +00001142SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001143 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001144 unsigned DwordOffset) const {
1145 unsigned ByteOffset = DwordOffset * 4;
1146 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001147 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001148
1149 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1150 assert(isInt<16>(ByteOffset));
1151
1152 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1153 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1154 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1155 false, false, false, 0);
1156}
1157
Tom Stellard75aadc22012-12-11 21:25:42 +00001158bool R600TargetLowering::isZero(SDValue Op) const {
1159 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1160 return Cst->isNullValue();
1161 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1162 return CstFP->isZero();
1163 } else {
1164 return false;
1165 }
1166}
1167
1168SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001169 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001170 EVT VT = Op.getValueType();
1171
1172 SDValue LHS = Op.getOperand(0);
1173 SDValue RHS = Op.getOperand(1);
1174 SDValue True = Op.getOperand(2);
1175 SDValue False = Op.getOperand(3);
1176 SDValue CC = Op.getOperand(4);
1177 SDValue Temp;
1178
1179 // LHS and RHS are guaranteed to be the same value type
1180 EVT CompareVT = LHS.getValueType();
1181
1182 // Check if we can lower this to a native operation.
1183
Tom Stellard2add82d2013-03-08 15:37:09 +00001184 // Try to lower to a SET* instruction:
1185 //
1186 // SET* can match the following patterns:
1187 //
Tom Stellardcd428182013-09-28 02:50:38 +00001188 // select_cc f32, f32, -1, 0, cc_supported
1189 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1190 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001191 //
1192
1193 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001194 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1195 ISD::CondCode InverseCC =
1196 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001197 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1198 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1199 std::swap(False, True);
1200 CC = DAG.getCondCode(InverseCC);
1201 } else {
1202 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1203 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1204 std::swap(False, True);
1205 std::swap(LHS, RHS);
1206 CC = DAG.getCondCode(SwapInvCC);
1207 }
1208 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001209 }
1210
1211 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1212 (CompareVT == VT || VT == MVT::i32)) {
1213 // This can be matched by a SET* instruction.
1214 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1215 }
1216
Tom Stellard75aadc22012-12-11 21:25:42 +00001217 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001218 //
1219 // CND* can match the following patterns:
1220 //
Tom Stellardcd428182013-09-28 02:50:38 +00001221 // select_cc f32, 0.0, f32, f32, cc_supported
1222 // select_cc f32, 0.0, i32, i32, cc_supported
1223 // select_cc i32, 0, f32, f32, cc_supported
1224 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001225 //
Tom Stellardcd428182013-09-28 02:50:38 +00001226
1227 // Try to move the zero value to the RHS
1228 if (isZero(LHS)) {
1229 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1230 // Try swapping the operands
1231 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1232 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1233 std::swap(LHS, RHS);
1234 CC = DAG.getCondCode(CCSwapped);
1235 } else {
1236 // Try inverting the conditon and then swapping the operands
1237 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1238 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1239 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1240 std::swap(True, False);
1241 std::swap(LHS, RHS);
1242 CC = DAG.getCondCode(CCSwapped);
1243 }
1244 }
1245 }
1246 if (isZero(RHS)) {
1247 SDValue Cond = LHS;
1248 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001249 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1250 if (CompareVT != VT) {
1251 // Bitcast True / False to the correct types. This will end up being
1252 // a nop, but it allows us to define only a single pattern in the
1253 // .TD files for each CND* instruction rather than having to have
1254 // one pattern for integer True/False and one for fp True/False
1255 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1256 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1257 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001258
1259 switch (CCOpcode) {
1260 case ISD::SETONE:
1261 case ISD::SETUNE:
1262 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001263 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1264 Temp = True;
1265 True = False;
1266 False = Temp;
1267 break;
1268 default:
1269 break;
1270 }
1271 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1272 Cond, Zero,
1273 True, False,
1274 DAG.getCondCode(CCOpcode));
1275 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1276 }
1277
Tom Stellard75aadc22012-12-11 21:25:42 +00001278 // If we make it this for it means we have no native instructions to handle
1279 // this SELECT_CC, so we must lower it.
1280 SDValue HWTrue, HWFalse;
1281
1282 if (CompareVT == MVT::f32) {
1283 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1284 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1285 } else if (CompareVT == MVT::i32) {
1286 HWTrue = DAG.getConstant(-1, CompareVT);
1287 HWFalse = DAG.getConstant(0, CompareVT);
1288 }
1289 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001290 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001291 }
1292
1293 // Lower this unsupported SELECT_CC into a combination of two supported
1294 // SELECT_CC operations.
1295 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1296
1297 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1298 Cond, HWFalse,
1299 True, False,
1300 DAG.getCondCode(ISD::SETNE));
1301}
1302
Alp Tokercb402912014-01-24 17:20:08 +00001303/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001304/// convert these pointers to a register index. Each register holds
1305/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1306/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1307/// for indirect addressing.
1308SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1309 unsigned StackWidth,
1310 SelectionDAG &DAG) const {
1311 unsigned SRLPad;
1312 switch(StackWidth) {
1313 case 1:
1314 SRLPad = 2;
1315 break;
1316 case 2:
1317 SRLPad = 3;
1318 break;
1319 case 4:
1320 SRLPad = 4;
1321 break;
1322 default: llvm_unreachable("Invalid stack width");
1323 }
1324
Andrew Trickef9de2a2013-05-25 02:42:55 +00001325 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001326 DAG.getConstant(SRLPad, MVT::i32));
1327}
1328
1329void R600TargetLowering::getStackAddress(unsigned StackWidth,
1330 unsigned ElemIdx,
1331 unsigned &Channel,
1332 unsigned &PtrIncr) const {
1333 switch (StackWidth) {
1334 default:
1335 case 1:
1336 Channel = 0;
1337 if (ElemIdx > 0) {
1338 PtrIncr = 1;
1339 } else {
1340 PtrIncr = 0;
1341 }
1342 break;
1343 case 2:
1344 Channel = ElemIdx % 2;
1345 if (ElemIdx == 2) {
1346 PtrIncr = 1;
1347 } else {
1348 PtrIncr = 0;
1349 }
1350 break;
1351 case 4:
1352 Channel = ElemIdx;
1353 PtrIncr = 0;
1354 break;
1355 }
1356}
1357
Tom Stellard75aadc22012-12-11 21:25:42 +00001358SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001359 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001360 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1361 SDValue Chain = Op.getOperand(0);
1362 SDValue Value = Op.getOperand(1);
1363 SDValue Ptr = Op.getOperand(2);
1364
Tom Stellard2ffc3302013-08-26 15:05:44 +00001365 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001366 if (Result.getNode()) {
1367 return Result;
1368 }
1369
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001370 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1371 if (StoreNode->isTruncatingStore()) {
1372 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001373 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001374 EVT MemVT = StoreNode->getMemoryVT();
1375 SDValue MaskConstant;
1376 if (MemVT == MVT::i8) {
1377 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1378 } else {
1379 assert(MemVT == MVT::i16);
1380 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1381 }
1382 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1383 DAG.getConstant(2, MVT::i32));
1384 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1385 DAG.getConstant(0x00000003, VT));
1386 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1387 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1388 DAG.getConstant(3, VT));
1389 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1390 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1391 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1392 // vector instead.
1393 SDValue Src[4] = {
1394 ShiftedValue,
1395 DAG.getConstant(0, MVT::i32),
1396 DAG.getConstant(0, MVT::i32),
1397 Mask
1398 };
Craig Topper48d114b2014-04-26 18:35:24 +00001399 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001400 SDValue Args[3] = { Chain, Input, DWordAddr };
1401 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001402 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001403 StoreNode->getMemOperand());
1404 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1405 Value.getValueType().bitsGE(MVT::i32)) {
1406 // Convert pointer from byte address to dword address.
1407 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1408 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1409 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001410
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001411 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001412 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001413 } else {
1414 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1415 }
1416 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001417 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001418 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001419
1420 EVT ValueVT = Value.getValueType();
1421
1422 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1423 return SDValue();
1424 }
1425
Tom Stellarde9373602014-01-22 19:24:14 +00001426 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1427 if (Ret.getNode()) {
1428 return Ret;
1429 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001430 // Lowering for indirect addressing
1431
1432 const MachineFunction &MF = DAG.getMachineFunction();
1433 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1434 getTargetMachine().getFrameLowering());
1435 unsigned StackWidth = TFL->getStackWidth(MF);
1436
1437 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1438
1439 if (ValueVT.isVector()) {
1440 unsigned NumElemVT = ValueVT.getVectorNumElements();
1441 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001442 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001443
1444 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1445 "vector width in load");
1446
1447 for (unsigned i = 0; i < NumElemVT; ++i) {
1448 unsigned Channel, PtrIncr;
1449 getStackAddress(StackWidth, i, Channel, PtrIncr);
1450 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1451 DAG.getConstant(PtrIncr, MVT::i32));
1452 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1453 Value, DAG.getConstant(i, MVT::i32));
1454
1455 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1456 Chain, Elem, Ptr,
1457 DAG.getTargetConstant(Channel, MVT::i32));
1458 }
Craig Topper48d114b2014-04-26 18:35:24 +00001459 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001460 } else {
1461 if (ValueVT == MVT::i8) {
1462 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1463 }
1464 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001465 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001466 }
1467
1468 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001469}
1470
Tom Stellard365366f2013-01-23 02:09:06 +00001471// return (512 + (kc_bank << 12)
1472static int
1473ConstantAddressBlock(unsigned AddressSpace) {
1474 switch (AddressSpace) {
1475 case AMDGPUAS::CONSTANT_BUFFER_0:
1476 return 512;
1477 case AMDGPUAS::CONSTANT_BUFFER_1:
1478 return 512 + 4096;
1479 case AMDGPUAS::CONSTANT_BUFFER_2:
1480 return 512 + 4096 * 2;
1481 case AMDGPUAS::CONSTANT_BUFFER_3:
1482 return 512 + 4096 * 3;
1483 case AMDGPUAS::CONSTANT_BUFFER_4:
1484 return 512 + 4096 * 4;
1485 case AMDGPUAS::CONSTANT_BUFFER_5:
1486 return 512 + 4096 * 5;
1487 case AMDGPUAS::CONSTANT_BUFFER_6:
1488 return 512 + 4096 * 6;
1489 case AMDGPUAS::CONSTANT_BUFFER_7:
1490 return 512 + 4096 * 7;
1491 case AMDGPUAS::CONSTANT_BUFFER_8:
1492 return 512 + 4096 * 8;
1493 case AMDGPUAS::CONSTANT_BUFFER_9:
1494 return 512 + 4096 * 9;
1495 case AMDGPUAS::CONSTANT_BUFFER_10:
1496 return 512 + 4096 * 10;
1497 case AMDGPUAS::CONSTANT_BUFFER_11:
1498 return 512 + 4096 * 11;
1499 case AMDGPUAS::CONSTANT_BUFFER_12:
1500 return 512 + 4096 * 12;
1501 case AMDGPUAS::CONSTANT_BUFFER_13:
1502 return 512 + 4096 * 13;
1503 case AMDGPUAS::CONSTANT_BUFFER_14:
1504 return 512 + 4096 * 14;
1505 case AMDGPUAS::CONSTANT_BUFFER_15:
1506 return 512 + 4096 * 15;
1507 default:
1508 return -1;
1509 }
1510}
1511
1512SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1513{
1514 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001515 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001516 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1517 SDValue Chain = Op.getOperand(0);
1518 SDValue Ptr = Op.getOperand(1);
1519 SDValue LoweredLoad;
1520
Tom Stellarde9373602014-01-22 19:24:14 +00001521 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1522 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001523 SDValue Ops[2] = {
1524 Ret,
1525 Chain
1526 };
Craig Topper64941d92014-04-27 19:20:57 +00001527 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001528 }
1529
Tom Stellard067c8152014-07-21 14:01:14 +00001530 // Lower loads constant address space global variable loads
1531 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
1532 isa<GlobalVariable>(
1533 GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
1534
1535 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1536 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1537 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1538 DAG.getConstant(2, MVT::i32));
1539 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1540 LoadNode->getChain(), Ptr,
1541 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
1542 }
Tom Stellarde9373602014-01-22 19:24:14 +00001543
Tom Stellard35bb18c2013-08-26 15:06:04 +00001544 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1545 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001546 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001547 Chain
1548 };
Craig Topper64941d92014-04-27 19:20:57 +00001549 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001550 }
1551
Tom Stellard365366f2013-01-23 02:09:06 +00001552 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001553 if (ConstantBlock > -1 &&
1554 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1555 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001556 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001557 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1558 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001559 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001560 SDValue Slots[4];
1561 for (unsigned i = 0; i < 4; i++) {
1562 // We want Const position encoded with the following formula :
1563 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1564 // const_index is Ptr computed by llvm using an alignment of 16.
1565 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1566 // then div by 4 at the ISel step
1567 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1568 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1569 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1570 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001571 EVT NewVT = MVT::v4i32;
1572 unsigned NumElements = 4;
1573 if (VT.isVector()) {
1574 NewVT = VT;
1575 NumElements = VT.getVectorNumElements();
1576 }
Craig Topper48d114b2014-04-26 18:35:24 +00001577 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001578 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001579 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001580 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001581 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001582 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001583 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001584 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001585 );
1586 }
1587
1588 if (!VT.isVector()) {
1589 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1590 DAG.getConstant(0, MVT::i32));
1591 }
1592
1593 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001594 Result,
1595 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001596 };
Craig Topper64941d92014-04-27 19:20:57 +00001597 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001598 }
1599
Matt Arsenault909d0c02013-10-30 23:43:29 +00001600 // For most operations returning SDValue() will result in the node being
1601 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1602 // need to manually expand loads that may be legal in some address spaces and
1603 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1604 // compute shaders, since the data is sign extended when it is uploaded to the
1605 // buffer. However SEXT loads from other address spaces are not supported, so
1606 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001607 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1608 EVT MemVT = LoadNode->getMemoryVT();
1609 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1610 SDValue ShiftAmount =
1611 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1612 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1613 LoadNode->getPointerInfo(), MemVT,
1614 LoadNode->isVolatile(),
1615 LoadNode->isNonTemporal(),
1616 LoadNode->getAlignment());
1617 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1618 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1619
1620 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001621 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001622 }
1623
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001624 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1625 return SDValue();
1626 }
1627
1628 // Lowering for indirect addressing
1629 const MachineFunction &MF = DAG.getMachineFunction();
1630 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1631 getTargetMachine().getFrameLowering());
1632 unsigned StackWidth = TFL->getStackWidth(MF);
1633
1634 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1635
1636 if (VT.isVector()) {
1637 unsigned NumElemVT = VT.getVectorNumElements();
1638 EVT ElemVT = VT.getVectorElementType();
1639 SDValue Loads[4];
1640
1641 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1642 "vector width in load");
1643
1644 for (unsigned i = 0; i < NumElemVT; ++i) {
1645 unsigned Channel, PtrIncr;
1646 getStackAddress(StackWidth, i, Channel, PtrIncr);
1647 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1648 DAG.getConstant(PtrIncr, MVT::i32));
1649 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1650 Chain, Ptr,
1651 DAG.getTargetConstant(Channel, MVT::i32),
1652 Op.getOperand(2));
1653 }
1654 for (unsigned i = NumElemVT; i < 4; ++i) {
1655 Loads[i] = DAG.getUNDEF(ElemVT);
1656 }
1657 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001658 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001659 } else {
1660 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1661 Chain, Ptr,
1662 DAG.getTargetConstant(0, MVT::i32), // Channel
1663 Op.getOperand(2));
1664 }
1665
Matt Arsenault7939acd2014-04-07 16:44:24 +00001666 SDValue Ops[2] = {
1667 LoweredLoad,
1668 Chain
1669 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001670
Craig Topper64941d92014-04-27 19:20:57 +00001671 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001672}
Tom Stellard75aadc22012-12-11 21:25:42 +00001673
Matt Arsenault1d555c42014-06-23 18:00:55 +00001674SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1675 SDValue Chain = Op.getOperand(0);
1676 SDValue Cond = Op.getOperand(1);
1677 SDValue Jump = Op.getOperand(2);
1678
1679 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1680 Chain, Jump, Cond);
1681}
1682
Tom Stellard75aadc22012-12-11 21:25:42 +00001683/// XXX Only kernel functions are supported, so we can assume for now that
1684/// every function is a kernel function, but in the future we should use
1685/// separate calling conventions for kernel and non-kernel functions.
1686SDValue R600TargetLowering::LowerFormalArguments(
1687 SDValue Chain,
1688 CallingConv::ID CallConv,
1689 bool isVarArg,
1690 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001691 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001692 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001693 SmallVector<CCValAssign, 16> ArgLocs;
1694 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1695 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001696 MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault762af962014-07-13 03:06:39 +00001697 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001698
Tom Stellardaf775432013-10-23 00:44:32 +00001699 SmallVector<ISD::InputArg, 8> LocalIns;
1700
Matt Arsenault209a7b92014-04-18 07:40:20 +00001701 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001702
1703 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001704
Tom Stellard1e803092013-07-23 01:48:18 +00001705 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001706 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001707 EVT VT = Ins[i].VT;
1708 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001709
Vincent Lejeunef143af32013-11-11 22:10:24 +00001710 if (ShaderType != ShaderType::COMPUTE) {
1711 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1712 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1713 InVals.push_back(Register);
1714 continue;
1715 }
1716
Tom Stellard75aadc22012-12-11 21:25:42 +00001717 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001718 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001719
Matt Arsenaultfae02982014-03-17 18:58:11 +00001720 // i64 isn't a legal type, so the register type used ends up as i32, which
1721 // isn't expected here. It attempts to create this sextload, but it ends up
1722 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1723 // for <1 x i64>.
1724
Tom Stellardacfeebf2013-07-23 01:48:05 +00001725 // The first 36 bytes of the input buffer contains information about
1726 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001727
1728 // FIXME: This should really check the extload type, but the handling of
1729 // extload vecto parameters seems to be broken.
1730 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1731 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1732 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001733 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1734 MachinePointerInfo(UndefValue::get(PtrTy)),
1735 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001736
1737 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001738 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001739 }
1740 return Chain;
1741}
1742
Matt Arsenault758659232013-05-18 00:21:46 +00001743EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001744 if (!VT.isVector())
1745 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001746 return VT.changeVectorElementTypeToInteger();
1747}
1748
Matt Arsenault209a7b92014-04-18 07:40:20 +00001749static SDValue CompactSwizzlableVector(
1750 SelectionDAG &DAG, SDValue VectorEntry,
1751 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001752 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1753 assert(RemapSwizzle.empty());
1754 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001755 VectorEntry.getOperand(0),
1756 VectorEntry.getOperand(1),
1757 VectorEntry.getOperand(2),
1758 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001759 };
1760
1761 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001762 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1763 // We mask write here to teach later passes that the ith element of this
1764 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1765 // break false dependencies and additionnaly make assembly easier to read.
1766 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001767 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1768 if (C->isZero()) {
1769 RemapSwizzle[i] = 4; // SEL_0
1770 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1771 } else if (C->isExactlyValue(1.0)) {
1772 RemapSwizzle[i] = 5; // SEL_1
1773 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1774 }
1775 }
1776
1777 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1778 continue;
1779 for (unsigned j = 0; j < i; j++) {
1780 if (NewBldVec[i] == NewBldVec[j]) {
1781 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1782 RemapSwizzle[i] = j;
1783 break;
1784 }
1785 }
1786 }
1787
1788 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001789 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001790}
1791
Benjamin Kramer193960c2013-06-11 13:32:25 +00001792static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1793 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001794 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1795 assert(RemapSwizzle.empty());
1796 SDValue NewBldVec[4] = {
1797 VectorEntry.getOperand(0),
1798 VectorEntry.getOperand(1),
1799 VectorEntry.getOperand(2),
1800 VectorEntry.getOperand(3)
1801 };
1802 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001803 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001804 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001805 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1806 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1807 ->getZExtValue();
1808 if (i == Idx)
1809 isUnmovable[Idx] = true;
1810 }
1811 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001812
1813 for (unsigned i = 0; i < 4; i++) {
1814 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1815 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1816 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001817 if (isUnmovable[Idx])
1818 continue;
1819 // Swap i and Idx
1820 std::swap(NewBldVec[Idx], NewBldVec[i]);
1821 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1822 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001823 }
1824 }
1825
1826 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001827 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001828}
1829
1830
1831SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1832SDValue Swz[4], SelectionDAG &DAG) const {
1833 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1834 // Old -> New swizzle values
1835 DenseMap<unsigned, unsigned> SwizzleRemap;
1836
1837 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1838 for (unsigned i = 0; i < 4; i++) {
1839 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1840 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1841 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1842 }
1843
1844 SwizzleRemap.clear();
1845 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1846 for (unsigned i = 0; i < 4; i++) {
1847 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1848 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1849 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1850 }
1851
1852 return BuildVector;
1853}
1854
1855
Tom Stellard75aadc22012-12-11 21:25:42 +00001856//===----------------------------------------------------------------------===//
1857// Custom DAG Optimizations
1858//===----------------------------------------------------------------------===//
1859
1860SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1861 DAGCombinerInfo &DCI) const {
1862 SelectionDAG &DAG = DCI.DAG;
1863
1864 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001865 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001866 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1867 case ISD::FP_ROUND: {
1868 SDValue Arg = N->getOperand(0);
1869 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001870 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001871 Arg.getOperand(0));
1872 }
1873 break;
1874 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001875
1876 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1877 // (i32 select_cc f32, f32, -1, 0 cc)
1878 //
1879 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1880 // this to one of the SET*_DX10 instructions.
1881 case ISD::FP_TO_SINT: {
1882 SDValue FNeg = N->getOperand(0);
1883 if (FNeg.getOpcode() != ISD::FNEG) {
1884 return SDValue();
1885 }
1886 SDValue SelectCC = FNeg.getOperand(0);
1887 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1888 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1889 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1890 !isHWTrueValue(SelectCC.getOperand(2)) ||
1891 !isHWFalseValue(SelectCC.getOperand(3))) {
1892 return SDValue();
1893 }
1894
Andrew Trickef9de2a2013-05-25 02:42:55 +00001895 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001896 SelectCC.getOperand(0), // LHS
1897 SelectCC.getOperand(1), // RHS
1898 DAG.getConstant(-1, MVT::i32), // True
1899 DAG.getConstant(0, MVT::i32), // Flase
1900 SelectCC.getOperand(4)); // CC
1901
1902 break;
1903 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001904
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001905 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1906 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001907 case ISD::INSERT_VECTOR_ELT: {
1908 SDValue InVec = N->getOperand(0);
1909 SDValue InVal = N->getOperand(1);
1910 SDValue EltNo = N->getOperand(2);
1911 SDLoc dl(N);
1912
1913 // If the inserted element is an UNDEF, just use the input vector.
1914 if (InVal.getOpcode() == ISD::UNDEF)
1915 return InVec;
1916
1917 EVT VT = InVec.getValueType();
1918
1919 // If we can't generate a legal BUILD_VECTOR, exit
1920 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1921 return SDValue();
1922
1923 // Check that we know which element is being inserted
1924 if (!isa<ConstantSDNode>(EltNo))
1925 return SDValue();
1926 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1927
1928 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1929 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1930 // vector elements.
1931 SmallVector<SDValue, 8> Ops;
1932 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1933 Ops.append(InVec.getNode()->op_begin(),
1934 InVec.getNode()->op_end());
1935 } else if (InVec.getOpcode() == ISD::UNDEF) {
1936 unsigned NElts = VT.getVectorNumElements();
1937 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1938 } else {
1939 return SDValue();
1940 }
1941
1942 // Insert the element
1943 if (Elt < Ops.size()) {
1944 // All the operands of BUILD_VECTOR must have the same type;
1945 // we enforce that here.
1946 EVT OpVT = Ops[0].getValueType();
1947 if (InVal.getValueType() != OpVT)
1948 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1949 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1950 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1951 Ops[Elt] = InVal;
1952 }
1953
1954 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001955 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001956 }
1957
Tom Stellard365366f2013-01-23 02:09:06 +00001958 // Extract_vec (Build_vector) generated by custom lowering
1959 // also needs to be customly combined
1960 case ISD::EXTRACT_VECTOR_ELT: {
1961 SDValue Arg = N->getOperand(0);
1962 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1963 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1964 unsigned Element = Const->getZExtValue();
1965 return Arg->getOperand(Element);
1966 }
1967 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001968 if (Arg.getOpcode() == ISD::BITCAST &&
1969 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1970 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1971 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001972 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001973 Arg->getOperand(0).getOperand(Element));
1974 }
1975 }
Tom Stellard365366f2013-01-23 02:09:06 +00001976 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001977
1978 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001979 // Try common optimizations
1980 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1981 if (Ret.getNode())
1982 return Ret;
1983
Tom Stellarde06163a2013-02-07 14:02:35 +00001984 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1985 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001986 //
1987 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1988 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001989 SDValue LHS = N->getOperand(0);
1990 if (LHS.getOpcode() != ISD::SELECT_CC) {
1991 return SDValue();
1992 }
1993
1994 SDValue RHS = N->getOperand(1);
1995 SDValue True = N->getOperand(2);
1996 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001997 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001998
1999 if (LHS.getOperand(2).getNode() != True.getNode() ||
2000 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002001 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002002 return SDValue();
2003 }
2004
Tom Stellard5e524892013-03-08 15:37:11 +00002005 switch (NCC) {
2006 default: return SDValue();
2007 case ISD::SETNE: return LHS;
2008 case ISD::SETEQ: {
2009 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2010 LHSCC = ISD::getSetCCInverse(LHSCC,
2011 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002012 if (DCI.isBeforeLegalizeOps() ||
2013 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2014 return DAG.getSelectCC(SDLoc(N),
2015 LHS.getOperand(0),
2016 LHS.getOperand(1),
2017 LHS.getOperand(2),
2018 LHS.getOperand(3),
2019 LHSCC);
2020 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002021 }
Tom Stellard5e524892013-03-08 15:37:11 +00002022 }
Tom Stellardcd428182013-09-28 02:50:38 +00002023 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002024 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002025
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002026 case AMDGPUISD::EXPORT: {
2027 SDValue Arg = N->getOperand(1);
2028 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2029 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002030
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002031 SDValue NewArgs[8] = {
2032 N->getOperand(0), // Chain
2033 SDValue(),
2034 N->getOperand(2), // ArrayBase
2035 N->getOperand(3), // Type
2036 N->getOperand(4), // SWZ_X
2037 N->getOperand(5), // SWZ_Y
2038 N->getOperand(6), // SWZ_Z
2039 N->getOperand(7) // SWZ_W
2040 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002041 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002042 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002043 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002044 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002045 case AMDGPUISD::TEXTURE_FETCH: {
2046 SDValue Arg = N->getOperand(1);
2047 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2048 break;
2049
2050 SDValue NewArgs[19] = {
2051 N->getOperand(0),
2052 N->getOperand(1),
2053 N->getOperand(2),
2054 N->getOperand(3),
2055 N->getOperand(4),
2056 N->getOperand(5),
2057 N->getOperand(6),
2058 N->getOperand(7),
2059 N->getOperand(8),
2060 N->getOperand(9),
2061 N->getOperand(10),
2062 N->getOperand(11),
2063 N->getOperand(12),
2064 N->getOperand(13),
2065 N->getOperand(14),
2066 N->getOperand(15),
2067 N->getOperand(16),
2068 N->getOperand(17),
2069 N->getOperand(18),
2070 };
2071 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2072 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002073 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002074 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002075 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002076
2077 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002078}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002079
2080static bool
2081FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002082 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002083 const R600InstrInfo *TII =
2084 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2085 if (!Src.isMachineOpcode())
2086 return false;
2087 switch (Src.getMachineOpcode()) {
2088 case AMDGPU::FNEG_R600:
2089 if (!Neg.getNode())
2090 return false;
2091 Src = Src.getOperand(0);
2092 Neg = DAG.getTargetConstant(1, MVT::i32);
2093 return true;
2094 case AMDGPU::FABS_R600:
2095 if (!Abs.getNode())
2096 return false;
2097 Src = Src.getOperand(0);
2098 Abs = DAG.getTargetConstant(1, MVT::i32);
2099 return true;
2100 case AMDGPU::CONST_COPY: {
2101 unsigned Opcode = ParentNode->getMachineOpcode();
2102 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2103
2104 if (!Sel.getNode())
2105 return false;
2106
2107 SDValue CstOffset = Src.getOperand(0);
2108 if (ParentNode->getValueType(0).isVector())
2109 return false;
2110
2111 // Gather constants values
2112 int SrcIndices[] = {
2113 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2114 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2115 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2116 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2117 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2118 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2119 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2120 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2121 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2122 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2123 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2124 };
2125 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002126 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002127 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2128 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2129 continue;
2130 if (HasDst) {
2131 OtherSrcIdx--;
2132 OtherSelIdx--;
2133 }
2134 if (RegisterSDNode *Reg =
2135 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2136 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002137 ConstantSDNode *Cst
2138 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002139 Consts.push_back(Cst->getZExtValue());
2140 }
2141 }
2142 }
2143
Matt Arsenault37c12d72014-05-12 20:42:57 +00002144 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002145 Consts.push_back(Cst->getZExtValue());
2146 if (!TII->fitsConstReadLimitations(Consts)) {
2147 return false;
2148 }
2149
2150 Sel = CstOffset;
2151 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2152 return true;
2153 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002154 case AMDGPU::MOV_IMM_I32:
2155 case AMDGPU::MOV_IMM_F32: {
2156 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2157 uint64_t ImmValue = 0;
2158
2159
2160 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2161 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2162 float FloatValue = FPC->getValueAPF().convertToFloat();
2163 if (FloatValue == 0.0) {
2164 ImmReg = AMDGPU::ZERO;
2165 } else if (FloatValue == 0.5) {
2166 ImmReg = AMDGPU::HALF;
2167 } else if (FloatValue == 1.0) {
2168 ImmReg = AMDGPU::ONE;
2169 } else {
2170 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2171 }
2172 } else {
2173 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2174 uint64_t Value = C->getZExtValue();
2175 if (Value == 0) {
2176 ImmReg = AMDGPU::ZERO;
2177 } else if (Value == 1) {
2178 ImmReg = AMDGPU::ONE_INT;
2179 } else {
2180 ImmValue = Value;
2181 }
2182 }
2183
2184 // Check that we aren't already using an immediate.
2185 // XXX: It's possible for an instruction to have more than one
2186 // immediate operand, but this is not supported yet.
2187 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2188 if (!Imm.getNode())
2189 return false;
2190 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2191 assert(C);
2192 if (C->getZExtValue())
2193 return false;
2194 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2195 }
2196 Src = DAG.getRegister(ImmReg, MVT::i32);
2197 return true;
2198 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002199 default:
2200 return false;
2201 }
2202}
2203
2204
2205/// \brief Fold the instructions after selecting them
2206SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2207 SelectionDAG &DAG) const {
2208 const R600InstrInfo *TII =
2209 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2210 if (!Node->isMachineOpcode())
2211 return Node;
2212 unsigned Opcode = Node->getMachineOpcode();
2213 SDValue FakeOp;
2214
2215 std::vector<SDValue> Ops;
Craig Topper66e588b2014-06-29 00:40:57 +00002216 for (const SDUse &I : Node->ops())
2217 Ops.push_back(I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002218
2219 if (Opcode == AMDGPU::DOT_4) {
2220 int OperandIdx[] = {
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2223 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2224 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2228 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002229 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002230 int NegIdx[] = {
2231 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2232 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2233 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2234 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2235 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2236 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2237 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2238 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2239 };
2240 int AbsIdx[] = {
2241 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2242 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2243 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2244 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2245 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2246 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2247 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2248 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2249 };
2250 for (unsigned i = 0; i < 8; i++) {
2251 if (OperandIdx[i] < 0)
2252 return Node;
2253 SDValue &Src = Ops[OperandIdx[i] - 1];
2254 SDValue &Neg = Ops[NegIdx[i] - 1];
2255 SDValue &Abs = Ops[AbsIdx[i] - 1];
2256 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2257 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2258 if (HasDst)
2259 SelIdx--;
2260 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002261 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2262 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2263 }
2264 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2265 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2266 SDValue &Src = Ops[i];
2267 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002268 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2269 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002270 } else if (Opcode == AMDGPU::CLAMP_R600) {
2271 SDValue Src = Node->getOperand(0);
2272 if (!Src.isMachineOpcode() ||
2273 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2274 return Node;
2275 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2276 AMDGPU::OpName::clamp);
2277 if (ClampIdx < 0)
2278 return Node;
2279 std::vector<SDValue> Ops;
2280 unsigned NumOp = Src.getNumOperands();
2281 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002282 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002283 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2284 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2285 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002286 } else {
2287 if (!TII->hasInstrModifiers(Opcode))
2288 return Node;
2289 int OperandIdx[] = {
2290 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2291 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2292 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2293 };
2294 int NegIdx[] = {
2295 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2296 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2297 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2298 };
2299 int AbsIdx[] = {
2300 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2301 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2302 -1
2303 };
2304 for (unsigned i = 0; i < 3; i++) {
2305 if (OperandIdx[i] < 0)
2306 return Node;
2307 SDValue &Src = Ops[OperandIdx[i] - 1];
2308 SDValue &Neg = Ops[NegIdx[i] - 1];
2309 SDValue FakeAbs;
2310 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2311 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2312 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002313 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2314 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002315 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002316 ImmIdx--;
2317 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002318 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002319 SDValue &Imm = Ops[ImmIdx];
2320 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002321 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2322 }
2323 }
2324
2325 return Node;
2326}