blob: c9e072a8327897c81e4a2a95c141425d00139583 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000071 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000072
73 setOperationAction(ISD::FSUB, MVT::f32, Expand);
74
75 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000078
Tom Stellard75aadc22012-12-11 21:25:42 +000079 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
80 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
81
Tom Stellarde8f9f282013-03-08 15:37:05 +000082 setOperationAction(ISD::SETCC, MVT::i32, Expand);
83 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000084 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000085 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
86 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000087
Tom Stellard53f2f902013-09-05 18:38:03 +000088 setOperationAction(ISD::SELECT, MVT::i32, Expand);
89 setOperationAction(ISD::SELECT, MVT::f32, Expand);
90 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000091 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000092
Matt Arsenault4e466652014-04-16 01:41:30 +000093 // Expand sign extension of vectors
94 if (!Subtarget->hasBFE())
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
96
97 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
99
100 if (!Subtarget->hasBFE())
101 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
104
105 if (!Subtarget->hasBFE())
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
109
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
113
114 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
115
116
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000117 // Legalize loads and stores to the private address space.
118 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000119 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000120 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000121
122 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
123 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000124 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
125 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
126 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
127 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000128 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
129 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
130
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000131 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000132 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000133 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000134 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000135 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
136 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000137
Tom Stellard365366f2013-01-23 02:09:06 +0000138 setOperationAction(ISD::LOAD, MVT::i32, Custom);
139 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000140 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
141
Tom Stellard880a80a2014-06-17 16:53:14 +0000142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
143 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
144 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
145 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
146
147 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
148 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
149 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
150 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
151
Tom Stellard75aadc22012-12-11 21:25:42 +0000152 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000153 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000155 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000156 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000157
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000158 setOperationAction(ISD::SUB, MVT::i64, Expand);
159
Tom Stellard5f337882014-04-29 23:12:43 +0000160 // These should be replaced by UDVIREM, but it does not happen automatically
161 // during Type Legalization
162 setOperationAction(ISD::UDIV, MVT::i64, Custom);
163 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000164 setOperationAction(ISD::SDIV, MVT::i64, Custom);
165 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000166
Jan Vesely25f36272014-06-18 12:27:13 +0000167 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
168 // to be Legal/Custom in order to avoid library calls.
169 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000170 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000171 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000172
Michel Danzer49812b52013-07-10 16:37:07 +0000173 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
174
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000175 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
176 for (MVT VT : ScalarIntVTs) {
177 setOperationAction(ISD::ADDC, VT, Expand);
178 setOperationAction(ISD::SUBC, VT, Expand);
179 setOperationAction(ISD::ADDE, VT, Expand);
180 setOperationAction(ISD::SUBE, VT, Expand);
181 }
182
Tom Stellardb852af52013-03-08 15:37:03 +0000183 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000184 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000185 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000186}
187
188MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
189 MachineInstr * MI, MachineBasicBlock * BB) const {
190 MachineFunction * MF = BB->getParent();
191 MachineRegisterInfo &MRI = MF->getRegInfo();
192 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000193 const R600InstrInfo *TII =
194 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000195
196 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000197 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000198 // Replace LDS_*_RET instruction that don't have any uses with the
199 // equivalent LDS_*_NORET instruction.
200 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000201 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
202 assert(DstIdx != -1);
203 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000204 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
205 return BB;
206
207 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
208 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000209 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
210 NewMI.addOperand(MI->getOperand(i));
211 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000212 } else {
213 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
214 }
215 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000216 case AMDGPU::CLAMP_R600: {
217 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
218 AMDGPU::MOV,
219 MI->getOperand(0).getReg(),
220 MI->getOperand(1).getReg());
221 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
222 break;
223 }
224
225 case AMDGPU::FABS_R600: {
226 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
227 AMDGPU::MOV,
228 MI->getOperand(0).getReg(),
229 MI->getOperand(1).getReg());
230 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
231 break;
232 }
233
234 case AMDGPU::FNEG_R600: {
235 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
236 AMDGPU::MOV,
237 MI->getOperand(0).getReg(),
238 MI->getOperand(1).getReg());
239 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
240 break;
241 }
242
Tom Stellard75aadc22012-12-11 21:25:42 +0000243 case AMDGPU::MASK_WRITE: {
244 unsigned maskedRegister = MI->getOperand(0).getReg();
245 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
246 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
247 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
248 break;
249 }
250
251 case AMDGPU::MOV_IMM_F32:
252 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
253 MI->getOperand(1).getFPImm()->getValueAPF()
254 .bitcastToAPInt().getZExtValue());
255 break;
256 case AMDGPU::MOV_IMM_I32:
257 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
258 MI->getOperand(1).getImm());
259 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000260 case AMDGPU::CONST_COPY: {
261 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
262 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000263 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000264 MI->getOperand(1).getImm());
265 break;
266 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000267
268 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000269 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000270 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000271 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000272
273 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
274 .addOperand(MI->getOperand(0))
275 .addOperand(MI->getOperand(1))
276 .addImm(EOP); // Set End of program bit
277 break;
278 }
279
Tom Stellard75aadc22012-12-11 21:25:42 +0000280 case AMDGPU::TXD: {
281 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
282 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000283 MachineOperand &RID = MI->getOperand(4);
284 MachineOperand &SID = MI->getOperand(5);
285 unsigned TextureId = MI->getOperand(6).getImm();
286 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
287 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000288
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000289 switch (TextureId) {
290 case 5: // Rect
291 CTX = CTY = 0;
292 break;
293 case 6: // Shadow1D
294 SrcW = SrcZ;
295 break;
296 case 7: // Shadow2D
297 SrcW = SrcZ;
298 break;
299 case 8: // ShadowRect
300 CTX = CTY = 0;
301 SrcW = SrcZ;
302 break;
303 case 9: // 1DArray
304 SrcZ = SrcY;
305 CTZ = 0;
306 break;
307 case 10: // 2DArray
308 CTZ = 0;
309 break;
310 case 11: // Shadow1DArray
311 SrcZ = SrcY;
312 CTZ = 0;
313 break;
314 case 12: // Shadow2DArray
315 CTZ = 0;
316 break;
317 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000318 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
319 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000320 .addImm(SrcX)
321 .addImm(SrcY)
322 .addImm(SrcZ)
323 .addImm(SrcW)
324 .addImm(0)
325 .addImm(0)
326 .addImm(0)
327 .addImm(0)
328 .addImm(1)
329 .addImm(2)
330 .addImm(3)
331 .addOperand(RID)
332 .addOperand(SID)
333 .addImm(CTX)
334 .addImm(CTY)
335 .addImm(CTZ)
336 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000337 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
338 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000339 .addImm(SrcX)
340 .addImm(SrcY)
341 .addImm(SrcZ)
342 .addImm(SrcW)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(0)
347 .addImm(1)
348 .addImm(2)
349 .addImm(3)
350 .addOperand(RID)
351 .addOperand(SID)
352 .addImm(CTX)
353 .addImm(CTY)
354 .addImm(CTZ)
355 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000356 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
357 .addOperand(MI->getOperand(0))
358 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 .addReg(T0, RegState::Implicit)
377 .addReg(T1, RegState::Implicit);
378 break;
379 }
380
381 case AMDGPU::TXD_SHADOW: {
382 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
383 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000384 MachineOperand &RID = MI->getOperand(4);
385 MachineOperand &SID = MI->getOperand(5);
386 unsigned TextureId = MI->getOperand(6).getImm();
387 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
388 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
389
390 switch (TextureId) {
391 case 5: // Rect
392 CTX = CTY = 0;
393 break;
394 case 6: // Shadow1D
395 SrcW = SrcZ;
396 break;
397 case 7: // Shadow2D
398 SrcW = SrcZ;
399 break;
400 case 8: // ShadowRect
401 CTX = CTY = 0;
402 SrcW = SrcZ;
403 break;
404 case 9: // 1DArray
405 SrcZ = SrcY;
406 CTZ = 0;
407 break;
408 case 10: // 2DArray
409 CTZ = 0;
410 break;
411 case 11: // Shadow1DArray
412 SrcZ = SrcY;
413 CTZ = 0;
414 break;
415 case 12: // Shadow2DArray
416 CTZ = 0;
417 break;
418 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000419
420 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
421 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000422 .addImm(SrcX)
423 .addImm(SrcY)
424 .addImm(SrcZ)
425 .addImm(SrcW)
426 .addImm(0)
427 .addImm(0)
428 .addImm(0)
429 .addImm(0)
430 .addImm(1)
431 .addImm(2)
432 .addImm(3)
433 .addOperand(RID)
434 .addOperand(SID)
435 .addImm(CTX)
436 .addImm(CTY)
437 .addImm(CTZ)
438 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000439 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
440 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000441 .addImm(SrcX)
442 .addImm(SrcY)
443 .addImm(SrcZ)
444 .addImm(SrcW)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(0)
449 .addImm(1)
450 .addImm(2)
451 .addImm(3)
452 .addOperand(RID)
453 .addOperand(SID)
454 .addImm(CTX)
455 .addImm(CTY)
456 .addImm(CTZ)
457 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000458 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
459 .addOperand(MI->getOperand(0))
460 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000461 .addImm(SrcX)
462 .addImm(SrcY)
463 .addImm(SrcZ)
464 .addImm(SrcW)
465 .addImm(0)
466 .addImm(0)
467 .addImm(0)
468 .addImm(0)
469 .addImm(1)
470 .addImm(2)
471 .addImm(3)
472 .addOperand(RID)
473 .addOperand(SID)
474 .addImm(CTX)
475 .addImm(CTY)
476 .addImm(CTZ)
477 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 .addReg(T0, RegState::Implicit)
479 .addReg(T1, RegState::Implicit);
480 break;
481 }
482
483 case AMDGPU::BRANCH:
484 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000485 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 break;
487
488 case AMDGPU::BRANCH_COND_f32: {
489 MachineInstr *NewMI =
490 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
491 AMDGPU::PREDICATE_BIT)
492 .addOperand(MI->getOperand(1))
493 .addImm(OPCODE_IS_NOT_ZERO)
494 .addImm(0); // Flags
495 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000496 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000497 .addOperand(MI->getOperand(0))
498 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
499 break;
500 }
501
502 case AMDGPU::BRANCH_COND_i32: {
503 MachineInstr *NewMI =
504 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
505 AMDGPU::PREDICATE_BIT)
506 .addOperand(MI->getOperand(1))
507 .addImm(OPCODE_IS_NOT_ZERO_INT)
508 .addImm(0); // Flags
509 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000510 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 .addOperand(MI->getOperand(0))
512 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
513 break;
514 }
515
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 case AMDGPU::EG_ExportSwz:
517 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000518 // Instruction is left unmodified if its not the last one of its type
519 bool isLastInstructionOfItsType = true;
520 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000521 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000522 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000523 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000524 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
525 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
526 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
527 .getImm();
528 if (CurrentInstExportType == InstExportType) {
529 isLastInstructionOfItsType = false;
530 break;
531 }
532 }
533 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000534 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000535 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000536 return BB;
537 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
538 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
539 .addOperand(MI->getOperand(0))
540 .addOperand(MI->getOperand(1))
541 .addOperand(MI->getOperand(2))
542 .addOperand(MI->getOperand(3))
543 .addOperand(MI->getOperand(4))
544 .addOperand(MI->getOperand(5))
545 .addOperand(MI->getOperand(6))
546 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000547 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000548 break;
549 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000550 case AMDGPU::RETURN: {
551 // RETURN instructions must have the live-out registers as implicit uses,
552 // otherwise they appear dead.
553 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
554 MachineInstrBuilder MIB(*MF, MI);
555 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
556 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
557 return BB;
558 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000559 }
560
561 MI->eraseFromParent();
562 return BB;
563}
564
565//===----------------------------------------------------------------------===//
566// Custom DAG Lowering Operations
567//===----------------------------------------------------------------------===//
568
Tom Stellard75aadc22012-12-11 21:25:42 +0000569SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000570 MachineFunction &MF = DAG.getMachineFunction();
571 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000572 switch (Op.getOpcode()) {
573 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000574 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
575 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000576 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000577 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000578 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000579 case ISD::FCOS:
580 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000581 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000582 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000583 case ISD::LOAD: {
584 SDValue Result = LowerLOAD(Op, DAG);
585 assert((!Result.getNode() ||
586 Result.getNode()->getNumValues() == 2) &&
587 "Load should return a value and a chain");
588 return Result;
589 }
590
Matt Arsenault1d555c42014-06-23 18:00:55 +0000591 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000592 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000593 case ISD::INTRINSIC_VOID: {
594 SDValue Chain = Op.getOperand(0);
595 unsigned IntrinsicID =
596 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
597 switch (IntrinsicID) {
598 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000599 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
600 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000601 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000602 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000603 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000604 case AMDGPUIntrinsic::R600_store_swizzle: {
605 const SDValue Args[8] = {
606 Chain,
607 Op.getOperand(2), // Export Value
608 Op.getOperand(3), // ArrayBase
609 Op.getOperand(4), // Type
610 DAG.getConstant(0, MVT::i32), // SWZ_X
611 DAG.getConstant(1, MVT::i32), // SWZ_Y
612 DAG.getConstant(2, MVT::i32), // SWZ_Z
613 DAG.getConstant(3, MVT::i32) // SWZ_W
614 };
Craig Topper48d114b2014-04-26 18:35:24 +0000615 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000616 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000617
Tom Stellard75aadc22012-12-11 21:25:42 +0000618 // default for switch(IntrinsicID)
619 default: break;
620 }
621 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
622 break;
623 }
624 case ISD::INTRINSIC_WO_CHAIN: {
625 unsigned IntrinsicID =
626 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
627 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000628 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000629 switch(IntrinsicID) {
630 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000631 case AMDGPUIntrinsic::R600_load_input: {
632 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
633 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
634 MachineFunction &MF = DAG.getMachineFunction();
635 MachineRegisterInfo &MRI = MF.getRegInfo();
636 MRI.addLiveIn(Reg);
637 return DAG.getCopyFromReg(DAG.getEntryNode(),
638 SDLoc(DAG.getEntryNode()), Reg, VT);
639 }
640
641 case AMDGPUIntrinsic::R600_interp_input: {
642 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
643 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
644 MachineSDNode *interp;
645 if (ijb < 0) {
646 const MachineFunction &MF = DAG.getMachineFunction();
647 const R600InstrInfo *TII =
648 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
649 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
650 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
651 return DAG.getTargetExtractSubreg(
652 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
653 DL, MVT::f32, SDValue(interp, 0));
654 }
655 MachineFunction &MF = DAG.getMachineFunction();
656 MachineRegisterInfo &MRI = MF.getRegInfo();
657 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
658 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
659 MRI.addLiveIn(RegisterI);
660 MRI.addLiveIn(RegisterJ);
661 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
662 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
663 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
664 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
665
666 if (slot % 4 < 2)
667 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
668 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
669 RegisterJNode, RegisterINode);
670 else
671 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
672 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
673 RegisterJNode, RegisterINode);
674 return SDValue(interp, slot % 2);
675 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000676 case AMDGPUIntrinsic::R600_interp_xy:
677 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000678 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000679 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000680 SDValue RegisterINode = Op.getOperand(2);
681 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000682
Vincent Lejeunef143af32013-11-11 22:10:24 +0000683 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000684 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000685 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000686 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000687 else
688 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000689 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000690 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000691 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
692 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000694 case AMDGPUIntrinsic::R600_tex:
695 case AMDGPUIntrinsic::R600_texc:
696 case AMDGPUIntrinsic::R600_txl:
697 case AMDGPUIntrinsic::R600_txlc:
698 case AMDGPUIntrinsic::R600_txb:
699 case AMDGPUIntrinsic::R600_txbc:
700 case AMDGPUIntrinsic::R600_txf:
701 case AMDGPUIntrinsic::R600_txq:
702 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000703 case AMDGPUIntrinsic::R600_ddy:
704 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000705 unsigned TextureOp;
706 switch (IntrinsicID) {
707 case AMDGPUIntrinsic::R600_tex:
708 TextureOp = 0;
709 break;
710 case AMDGPUIntrinsic::R600_texc:
711 TextureOp = 1;
712 break;
713 case AMDGPUIntrinsic::R600_txl:
714 TextureOp = 2;
715 break;
716 case AMDGPUIntrinsic::R600_txlc:
717 TextureOp = 3;
718 break;
719 case AMDGPUIntrinsic::R600_txb:
720 TextureOp = 4;
721 break;
722 case AMDGPUIntrinsic::R600_txbc:
723 TextureOp = 5;
724 break;
725 case AMDGPUIntrinsic::R600_txf:
726 TextureOp = 6;
727 break;
728 case AMDGPUIntrinsic::R600_txq:
729 TextureOp = 7;
730 break;
731 case AMDGPUIntrinsic::R600_ddx:
732 TextureOp = 8;
733 break;
734 case AMDGPUIntrinsic::R600_ddy:
735 TextureOp = 9;
736 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000737 case AMDGPUIntrinsic::R600_ldptr:
738 TextureOp = 10;
739 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000740 default:
741 llvm_unreachable("Unknow Texture Operation");
742 }
743
744 SDValue TexArgs[19] = {
745 DAG.getConstant(TextureOp, MVT::i32),
746 Op.getOperand(1),
747 DAG.getConstant(0, MVT::i32),
748 DAG.getConstant(1, MVT::i32),
749 DAG.getConstant(2, MVT::i32),
750 DAG.getConstant(3, MVT::i32),
751 Op.getOperand(2),
752 Op.getOperand(3),
753 Op.getOperand(4),
754 DAG.getConstant(0, MVT::i32),
755 DAG.getConstant(1, MVT::i32),
756 DAG.getConstant(2, MVT::i32),
757 DAG.getConstant(3, MVT::i32),
758 Op.getOperand(5),
759 Op.getOperand(6),
760 Op.getOperand(7),
761 Op.getOperand(8),
762 Op.getOperand(9),
763 Op.getOperand(10)
764 };
Craig Topper48d114b2014-04-26 18:35:24 +0000765 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000766 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000767 case AMDGPUIntrinsic::AMDGPU_dp4: {
768 SDValue Args[8] = {
769 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
770 DAG.getConstant(0, MVT::i32)),
771 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
772 DAG.getConstant(0, MVT::i32)),
773 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
774 DAG.getConstant(1, MVT::i32)),
775 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
776 DAG.getConstant(1, MVT::i32)),
777 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
778 DAG.getConstant(2, MVT::i32)),
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
780 DAG.getConstant(2, MVT::i32)),
781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
782 DAG.getConstant(3, MVT::i32)),
783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
784 DAG.getConstant(3, MVT::i32))
785 };
Craig Topper48d114b2014-04-26 18:35:24 +0000786 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000787 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000788
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000789 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000790 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000791 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000792 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 8);
807
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000808 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
810 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
813 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000814 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000815 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
816 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000817 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000818 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
819 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000820 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000821 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
822 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000823 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000824 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
825 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000826 case Intrinsic::AMDGPU_rsq:
827 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
828 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000829 }
830 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
831 break;
832 }
833 } // end switch(Op.getOpcode())
834 return SDValue();
835}
836
837void R600TargetLowering::ReplaceNodeResults(SDNode *N,
838 SmallVectorImpl<SDValue> &Results,
839 SelectionDAG &DAG) const {
840 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000841 default:
842 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
843 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000844 case ISD::FP_TO_UINT:
845 if (N->getValueType(0) == MVT::i1) {
846 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
847 return;
848 }
849 // Fall-through. Since we don't care about out of bounds values
850 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
851 // considers some extra cases which are not necessary here.
852 case ISD::FP_TO_SINT: {
853 SDValue Result;
854 if (expandFP_TO_SINT(N, Result, DAG))
855 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000856 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000857 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000858 case ISD::UDIV: {
859 SDValue Op = SDValue(N, 0);
860 SDLoc DL(Op);
861 EVT VT = Op.getValueType();
862 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
863 N->getOperand(0), N->getOperand(1));
864 Results.push_back(UDIVREM);
865 break;
866 }
867 case ISD::UREM: {
868 SDValue Op = SDValue(N, 0);
869 SDLoc DL(Op);
870 EVT VT = Op.getValueType();
871 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
872 N->getOperand(0), N->getOperand(1));
873 Results.push_back(UDIVREM.getValue(1));
874 break;
875 }
876 case ISD::SDIV: {
877 SDValue Op = SDValue(N, 0);
878 SDLoc DL(Op);
879 EVT VT = Op.getValueType();
880 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
881 N->getOperand(0), N->getOperand(1));
882 Results.push_back(SDIVREM);
883 break;
884 }
885 case ISD::SREM: {
886 SDValue Op = SDValue(N, 0);
887 SDLoc DL(Op);
888 EVT VT = Op.getValueType();
889 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
890 N->getOperand(0), N->getOperand(1));
891 Results.push_back(SDIVREM.getValue(1));
892 break;
893 }
894 case ISD::SDIVREM: {
895 SDValue Op = SDValue(N, 1);
896 SDValue RES = LowerSDIVREM(Op, DAG);
897 Results.push_back(RES);
898 Results.push_back(RES.getValue(1));
899 break;
900 }
901 case ISD::UDIVREM: {
902 SDValue Op = SDValue(N, 0);
903 SDLoc DL(Op);
904 EVT VT = Op.getValueType();
905 EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
906
907 SDValue one = DAG.getConstant(1, HalfVT);
908 SDValue zero = DAG.getConstant(0, HalfVT);
909
910 //HiLo split
911 SDValue LHS = N->getOperand(0);
912 SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
913 SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
914
915 SDValue RHS = N->getOperand(1);
916 SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
917 SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
918
919 // Get Speculative values
920 SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
921 SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
922
923 SDValue REM_Hi = zero;
924 SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
925
926 SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
927 SDValue DIV_Lo = zero;
928
929 const unsigned halfBitWidth = HalfVT.getSizeInBits();
930
931 for (unsigned i = 0; i < halfBitWidth; ++i) {
932 SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
933 // Get Value of high bit
934 SDValue HBit;
935 if (halfBitWidth == 32 && Subtarget->hasBFE()) {
936 HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
937 } else {
938 HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
939 HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
940 }
941
942 SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
943 DAG.getConstant(halfBitWidth - 1, HalfVT));
944 REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
945 REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
946
947 REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
948 REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
949
950
951 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
952
953 SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
954 SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
955
956 DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
957
958 // Update REM
959
960 SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
961
962 REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
963 REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
964 REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
965 }
966
967 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
968 SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
969 Results.push_back(DIV);
970 Results.push_back(REM);
971 break;
972 }
973 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000974}
975
Tom Stellard880a80a2014-06-17 16:53:14 +0000976SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
977 SDValue Vector) const {
978
979 SDLoc DL(Vector);
980 EVT VecVT = Vector.getValueType();
981 EVT EltVT = VecVT.getVectorElementType();
982 SmallVector<SDValue, 8> Args;
983
984 for (unsigned i = 0, e = VecVT.getVectorNumElements();
985 i != e; ++i) {
986 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
987 Vector, DAG.getConstant(i, getVectorIdxTy())));
988 }
989
990 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
991}
992
993SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
994 SelectionDAG &DAG) const {
995
996 SDLoc DL(Op);
997 SDValue Vector = Op.getOperand(0);
998 SDValue Index = Op.getOperand(1);
999
1000 if (isa<ConstantSDNode>(Index) ||
1001 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1002 return Op;
1003
1004 Vector = vectorToVerticalVector(DAG, Vector);
1005 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
1006 Vector, Index);
1007}
1008
1009SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1010 SelectionDAG &DAG) const {
1011 SDLoc DL(Op);
1012 SDValue Vector = Op.getOperand(0);
1013 SDValue Value = Op.getOperand(1);
1014 SDValue Index = Op.getOperand(2);
1015
1016 if (isa<ConstantSDNode>(Index) ||
1017 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1018 return Op;
1019
1020 Vector = vectorToVerticalVector(DAG, Vector);
1021 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
1022 Vector, Value, Index);
1023 return vectorToVerticalVector(DAG, Insert);
1024}
1025
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001026SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1027 // On hw >= R700, COS/SIN input must be between -1. and 1.
1028 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
1029 EVT VT = Op.getValueType();
1030 SDValue Arg = Op.getOperand(0);
1031 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
1032 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
1033 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
1034 DAG.getConstantFP(0.15915494309, MVT::f32)),
1035 DAG.getConstantFP(0.5, MVT::f32)));
1036 unsigned TrigNode;
1037 switch (Op.getOpcode()) {
1038 case ISD::FCOS:
1039 TrigNode = AMDGPUISD::COS_HW;
1040 break;
1041 case ISD::FSIN:
1042 TrigNode = AMDGPUISD::SIN_HW;
1043 break;
1044 default:
1045 llvm_unreachable("Wrong trig opcode");
1046 }
1047 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
1048 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
1049 DAG.getConstantFP(-0.5, MVT::f32)));
1050 if (Gen >= AMDGPUSubtarget::R700)
1051 return TrigVal;
1052 // On R600 hw, COS/SIN input must be between -Pi and Pi.
1053 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1054 DAG.getConstantFP(3.14159265359, MVT::f32));
1055}
1056
Jan Vesely25f36272014-06-18 12:27:13 +00001057SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1058 SDLoc DL(Op);
1059 EVT VT = Op.getValueType();
1060
1061 SDValue Lo = Op.getOperand(0);
1062 SDValue Hi = Op.getOperand(1);
1063 SDValue Shift = Op.getOperand(2);
1064 SDValue Zero = DAG.getConstant(0, VT);
1065 SDValue One = DAG.getConstant(1, VT);
1066
1067 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1068 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1069 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1070 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1071
1072 // The dance around Width1 is necessary for 0 special case.
1073 // Without it the CompShift might be 32, producing incorrect results in
1074 // Overflow. So we do the shift in two steps, the alternative is to
1075 // add a conditional to filter the special case.
1076
1077 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1078 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1079
1080 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1081 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1082 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1083
1084 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1085 SDValue LoBig = Zero;
1086
1087 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1088 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1089
1090 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1091}
1092
Jan Vesely900ff2e2014-06-18 12:27:15 +00001093SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1094 SDLoc DL(Op);
1095 EVT VT = Op.getValueType();
1096
1097 SDValue Lo = Op.getOperand(0);
1098 SDValue Hi = Op.getOperand(1);
1099 SDValue Shift = Op.getOperand(2);
1100 SDValue Zero = DAG.getConstant(0, VT);
1101 SDValue One = DAG.getConstant(1, VT);
1102
Jan Veselyecf51332014-06-18 12:27:17 +00001103 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1104
Jan Vesely900ff2e2014-06-18 12:27:15 +00001105 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1106 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1107 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1108 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1109
1110 // The dance around Width1 is necessary for 0 special case.
1111 // Without it the CompShift might be 32, producing incorrect results in
1112 // Overflow. So we do the shift in two steps, the alternative is to
1113 // add a conditional to filter the special case.
1114
1115 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1116 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1117
Jan Veselyecf51332014-06-18 12:27:17 +00001118 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001119 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1120 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1121
Jan Veselyecf51332014-06-18 12:27:17 +00001122 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1123 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001124
1125 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1126 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1127
1128 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1129}
1130
Tom Stellard75aadc22012-12-11 21:25:42 +00001131SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1132 return DAG.getNode(
1133 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001134 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001135 MVT::i1,
1136 Op, DAG.getConstantFP(0.0f, MVT::f32),
1137 DAG.getCondCode(ISD::SETNE)
1138 );
1139}
1140
Tom Stellard75aadc22012-12-11 21:25:42 +00001141SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001142 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001143 unsigned DwordOffset) const {
1144 unsigned ByteOffset = DwordOffset * 4;
1145 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001146 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001147
1148 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1149 assert(isInt<16>(ByteOffset));
1150
1151 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1152 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1153 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1154 false, false, false, 0);
1155}
1156
Tom Stellard75aadc22012-12-11 21:25:42 +00001157bool R600TargetLowering::isZero(SDValue Op) const {
1158 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1159 return Cst->isNullValue();
1160 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1161 return CstFP->isZero();
1162 } else {
1163 return false;
1164 }
1165}
1166
1167SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001168 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001169 EVT VT = Op.getValueType();
1170
1171 SDValue LHS = Op.getOperand(0);
1172 SDValue RHS = Op.getOperand(1);
1173 SDValue True = Op.getOperand(2);
1174 SDValue False = Op.getOperand(3);
1175 SDValue CC = Op.getOperand(4);
1176 SDValue Temp;
1177
1178 // LHS and RHS are guaranteed to be the same value type
1179 EVT CompareVT = LHS.getValueType();
1180
1181 // Check if we can lower this to a native operation.
1182
Tom Stellard2add82d2013-03-08 15:37:09 +00001183 // Try to lower to a SET* instruction:
1184 //
1185 // SET* can match the following patterns:
1186 //
Tom Stellardcd428182013-09-28 02:50:38 +00001187 // select_cc f32, f32, -1, 0, cc_supported
1188 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1189 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001190 //
1191
1192 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001193 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1194 ISD::CondCode InverseCC =
1195 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001196 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1197 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1198 std::swap(False, True);
1199 CC = DAG.getCondCode(InverseCC);
1200 } else {
1201 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1202 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1203 std::swap(False, True);
1204 std::swap(LHS, RHS);
1205 CC = DAG.getCondCode(SwapInvCC);
1206 }
1207 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001208 }
1209
1210 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1211 (CompareVT == VT || VT == MVT::i32)) {
1212 // This can be matched by a SET* instruction.
1213 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1214 }
1215
Tom Stellard75aadc22012-12-11 21:25:42 +00001216 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001217 //
1218 // CND* can match the following patterns:
1219 //
Tom Stellardcd428182013-09-28 02:50:38 +00001220 // select_cc f32, 0.0, f32, f32, cc_supported
1221 // select_cc f32, 0.0, i32, i32, cc_supported
1222 // select_cc i32, 0, f32, f32, cc_supported
1223 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001224 //
Tom Stellardcd428182013-09-28 02:50:38 +00001225
1226 // Try to move the zero value to the RHS
1227 if (isZero(LHS)) {
1228 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1229 // Try swapping the operands
1230 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1231 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1232 std::swap(LHS, RHS);
1233 CC = DAG.getCondCode(CCSwapped);
1234 } else {
1235 // Try inverting the conditon and then swapping the operands
1236 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1237 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1238 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1239 std::swap(True, False);
1240 std::swap(LHS, RHS);
1241 CC = DAG.getCondCode(CCSwapped);
1242 }
1243 }
1244 }
1245 if (isZero(RHS)) {
1246 SDValue Cond = LHS;
1247 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001248 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1249 if (CompareVT != VT) {
1250 // Bitcast True / False to the correct types. This will end up being
1251 // a nop, but it allows us to define only a single pattern in the
1252 // .TD files for each CND* instruction rather than having to have
1253 // one pattern for integer True/False and one for fp True/False
1254 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1255 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1256 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001257
1258 switch (CCOpcode) {
1259 case ISD::SETONE:
1260 case ISD::SETUNE:
1261 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001262 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1263 Temp = True;
1264 True = False;
1265 False = Temp;
1266 break;
1267 default:
1268 break;
1269 }
1270 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1271 Cond, Zero,
1272 True, False,
1273 DAG.getCondCode(CCOpcode));
1274 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1275 }
1276
Tom Stellard75aadc22012-12-11 21:25:42 +00001277 // If we make it this for it means we have no native instructions to handle
1278 // this SELECT_CC, so we must lower it.
1279 SDValue HWTrue, HWFalse;
1280
1281 if (CompareVT == MVT::f32) {
1282 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1283 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1284 } else if (CompareVT == MVT::i32) {
1285 HWTrue = DAG.getConstant(-1, CompareVT);
1286 HWFalse = DAG.getConstant(0, CompareVT);
1287 }
1288 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001289 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001290 }
1291
1292 // Lower this unsupported SELECT_CC into a combination of two supported
1293 // SELECT_CC operations.
1294 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1295
1296 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1297 Cond, HWFalse,
1298 True, False,
1299 DAG.getCondCode(ISD::SETNE));
1300}
1301
Alp Tokercb402912014-01-24 17:20:08 +00001302/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001303/// convert these pointers to a register index. Each register holds
1304/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1305/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1306/// for indirect addressing.
1307SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1308 unsigned StackWidth,
1309 SelectionDAG &DAG) const {
1310 unsigned SRLPad;
1311 switch(StackWidth) {
1312 case 1:
1313 SRLPad = 2;
1314 break;
1315 case 2:
1316 SRLPad = 3;
1317 break;
1318 case 4:
1319 SRLPad = 4;
1320 break;
1321 default: llvm_unreachable("Invalid stack width");
1322 }
1323
Andrew Trickef9de2a2013-05-25 02:42:55 +00001324 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001325 DAG.getConstant(SRLPad, MVT::i32));
1326}
1327
1328void R600TargetLowering::getStackAddress(unsigned StackWidth,
1329 unsigned ElemIdx,
1330 unsigned &Channel,
1331 unsigned &PtrIncr) const {
1332 switch (StackWidth) {
1333 default:
1334 case 1:
1335 Channel = 0;
1336 if (ElemIdx > 0) {
1337 PtrIncr = 1;
1338 } else {
1339 PtrIncr = 0;
1340 }
1341 break;
1342 case 2:
1343 Channel = ElemIdx % 2;
1344 if (ElemIdx == 2) {
1345 PtrIncr = 1;
1346 } else {
1347 PtrIncr = 0;
1348 }
1349 break;
1350 case 4:
1351 Channel = ElemIdx;
1352 PtrIncr = 0;
1353 break;
1354 }
1355}
1356
Tom Stellard75aadc22012-12-11 21:25:42 +00001357SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001358 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001359 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1360 SDValue Chain = Op.getOperand(0);
1361 SDValue Value = Op.getOperand(1);
1362 SDValue Ptr = Op.getOperand(2);
1363
Tom Stellard2ffc3302013-08-26 15:05:44 +00001364 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001365 if (Result.getNode()) {
1366 return Result;
1367 }
1368
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001369 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1370 if (StoreNode->isTruncatingStore()) {
1371 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001372 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001373 EVT MemVT = StoreNode->getMemoryVT();
1374 SDValue MaskConstant;
1375 if (MemVT == MVT::i8) {
1376 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1377 } else {
1378 assert(MemVT == MVT::i16);
1379 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1380 }
1381 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1382 DAG.getConstant(2, MVT::i32));
1383 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1384 DAG.getConstant(0x00000003, VT));
1385 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1386 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1387 DAG.getConstant(3, VT));
1388 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1389 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1390 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1391 // vector instead.
1392 SDValue Src[4] = {
1393 ShiftedValue,
1394 DAG.getConstant(0, MVT::i32),
1395 DAG.getConstant(0, MVT::i32),
1396 Mask
1397 };
Craig Topper48d114b2014-04-26 18:35:24 +00001398 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001399 SDValue Args[3] = { Chain, Input, DWordAddr };
1400 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001401 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001402 StoreNode->getMemOperand());
1403 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1404 Value.getValueType().bitsGE(MVT::i32)) {
1405 // Convert pointer from byte address to dword address.
1406 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1407 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1408 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001409
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001410 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001411 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001412 } else {
1413 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1414 }
1415 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001416 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001417 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001418
1419 EVT ValueVT = Value.getValueType();
1420
1421 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1422 return SDValue();
1423 }
1424
Tom Stellarde9373602014-01-22 19:24:14 +00001425 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1426 if (Ret.getNode()) {
1427 return Ret;
1428 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001429 // Lowering for indirect addressing
1430
1431 const MachineFunction &MF = DAG.getMachineFunction();
1432 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1433 getTargetMachine().getFrameLowering());
1434 unsigned StackWidth = TFL->getStackWidth(MF);
1435
1436 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1437
1438 if (ValueVT.isVector()) {
1439 unsigned NumElemVT = ValueVT.getVectorNumElements();
1440 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001441 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001442
1443 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1444 "vector width in load");
1445
1446 for (unsigned i = 0; i < NumElemVT; ++i) {
1447 unsigned Channel, PtrIncr;
1448 getStackAddress(StackWidth, i, Channel, PtrIncr);
1449 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1450 DAG.getConstant(PtrIncr, MVT::i32));
1451 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1452 Value, DAG.getConstant(i, MVT::i32));
1453
1454 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1455 Chain, Elem, Ptr,
1456 DAG.getTargetConstant(Channel, MVT::i32));
1457 }
Craig Topper48d114b2014-04-26 18:35:24 +00001458 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001459 } else {
1460 if (ValueVT == MVT::i8) {
1461 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1462 }
1463 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001464 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001465 }
1466
1467 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001468}
1469
Tom Stellard365366f2013-01-23 02:09:06 +00001470// return (512 + (kc_bank << 12)
1471static int
1472ConstantAddressBlock(unsigned AddressSpace) {
1473 switch (AddressSpace) {
1474 case AMDGPUAS::CONSTANT_BUFFER_0:
1475 return 512;
1476 case AMDGPUAS::CONSTANT_BUFFER_1:
1477 return 512 + 4096;
1478 case AMDGPUAS::CONSTANT_BUFFER_2:
1479 return 512 + 4096 * 2;
1480 case AMDGPUAS::CONSTANT_BUFFER_3:
1481 return 512 + 4096 * 3;
1482 case AMDGPUAS::CONSTANT_BUFFER_4:
1483 return 512 + 4096 * 4;
1484 case AMDGPUAS::CONSTANT_BUFFER_5:
1485 return 512 + 4096 * 5;
1486 case AMDGPUAS::CONSTANT_BUFFER_6:
1487 return 512 + 4096 * 6;
1488 case AMDGPUAS::CONSTANT_BUFFER_7:
1489 return 512 + 4096 * 7;
1490 case AMDGPUAS::CONSTANT_BUFFER_8:
1491 return 512 + 4096 * 8;
1492 case AMDGPUAS::CONSTANT_BUFFER_9:
1493 return 512 + 4096 * 9;
1494 case AMDGPUAS::CONSTANT_BUFFER_10:
1495 return 512 + 4096 * 10;
1496 case AMDGPUAS::CONSTANT_BUFFER_11:
1497 return 512 + 4096 * 11;
1498 case AMDGPUAS::CONSTANT_BUFFER_12:
1499 return 512 + 4096 * 12;
1500 case AMDGPUAS::CONSTANT_BUFFER_13:
1501 return 512 + 4096 * 13;
1502 case AMDGPUAS::CONSTANT_BUFFER_14:
1503 return 512 + 4096 * 14;
1504 case AMDGPUAS::CONSTANT_BUFFER_15:
1505 return 512 + 4096 * 15;
1506 default:
1507 return -1;
1508 }
1509}
1510
1511SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1512{
1513 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001514 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001515 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1516 SDValue Chain = Op.getOperand(0);
1517 SDValue Ptr = Op.getOperand(1);
1518 SDValue LoweredLoad;
1519
Tom Stellarde9373602014-01-22 19:24:14 +00001520 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1521 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001522 SDValue Ops[2] = {
1523 Ret,
1524 Chain
1525 };
Craig Topper64941d92014-04-27 19:20:57 +00001526 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001527 }
1528
1529
Tom Stellard35bb18c2013-08-26 15:06:04 +00001530 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1531 SDValue MergedValues[2] = {
1532 SplitVectorLoad(Op, DAG),
1533 Chain
1534 };
Craig Topper64941d92014-04-27 19:20:57 +00001535 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001536 }
1537
Tom Stellard365366f2013-01-23 02:09:06 +00001538 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001539 if (ConstantBlock > -1 &&
1540 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1541 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001542 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001543 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1544 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001545 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001546 SDValue Slots[4];
1547 for (unsigned i = 0; i < 4; i++) {
1548 // We want Const position encoded with the following formula :
1549 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1550 // const_index is Ptr computed by llvm using an alignment of 16.
1551 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1552 // then div by 4 at the ISel step
1553 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1554 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1555 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1556 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001557 EVT NewVT = MVT::v4i32;
1558 unsigned NumElements = 4;
1559 if (VT.isVector()) {
1560 NewVT = VT;
1561 NumElements = VT.getVectorNumElements();
1562 }
Craig Topper48d114b2014-04-26 18:35:24 +00001563 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001564 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001565 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001566 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001567 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001568 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001569 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001570 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001571 );
1572 }
1573
1574 if (!VT.isVector()) {
1575 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1576 DAG.getConstant(0, MVT::i32));
1577 }
1578
1579 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001580 Result,
1581 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001582 };
Craig Topper64941d92014-04-27 19:20:57 +00001583 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001584 }
1585
Matt Arsenault909d0c02013-10-30 23:43:29 +00001586 // For most operations returning SDValue() will result in the node being
1587 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1588 // need to manually expand loads that may be legal in some address spaces and
1589 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1590 // compute shaders, since the data is sign extended when it is uploaded to the
1591 // buffer. However SEXT loads from other address spaces are not supported, so
1592 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001593 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1594 EVT MemVT = LoadNode->getMemoryVT();
1595 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1596 SDValue ShiftAmount =
1597 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1598 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1599 LoadNode->getPointerInfo(), MemVT,
1600 LoadNode->isVolatile(),
1601 LoadNode->isNonTemporal(),
1602 LoadNode->getAlignment());
1603 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1604 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1605
1606 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001607 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001608 }
1609
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001610 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1611 return SDValue();
1612 }
1613
1614 // Lowering for indirect addressing
1615 const MachineFunction &MF = DAG.getMachineFunction();
1616 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1617 getTargetMachine().getFrameLowering());
1618 unsigned StackWidth = TFL->getStackWidth(MF);
1619
1620 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1621
1622 if (VT.isVector()) {
1623 unsigned NumElemVT = VT.getVectorNumElements();
1624 EVT ElemVT = VT.getVectorElementType();
1625 SDValue Loads[4];
1626
1627 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1628 "vector width in load");
1629
1630 for (unsigned i = 0; i < NumElemVT; ++i) {
1631 unsigned Channel, PtrIncr;
1632 getStackAddress(StackWidth, i, Channel, PtrIncr);
1633 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1634 DAG.getConstant(PtrIncr, MVT::i32));
1635 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1636 Chain, Ptr,
1637 DAG.getTargetConstant(Channel, MVT::i32),
1638 Op.getOperand(2));
1639 }
1640 for (unsigned i = NumElemVT; i < 4; ++i) {
1641 Loads[i] = DAG.getUNDEF(ElemVT);
1642 }
1643 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001644 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001645 } else {
1646 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1647 Chain, Ptr,
1648 DAG.getTargetConstant(0, MVT::i32), // Channel
1649 Op.getOperand(2));
1650 }
1651
Matt Arsenault7939acd2014-04-07 16:44:24 +00001652 SDValue Ops[2] = {
1653 LoweredLoad,
1654 Chain
1655 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001656
Craig Topper64941d92014-04-27 19:20:57 +00001657 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001658}
Tom Stellard75aadc22012-12-11 21:25:42 +00001659
Matt Arsenault1d555c42014-06-23 18:00:55 +00001660SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1661 SDValue Chain = Op.getOperand(0);
1662 SDValue Cond = Op.getOperand(1);
1663 SDValue Jump = Op.getOperand(2);
1664
1665 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1666 Chain, Jump, Cond);
1667}
1668
Tom Stellard75aadc22012-12-11 21:25:42 +00001669/// XXX Only kernel functions are supported, so we can assume for now that
1670/// every function is a kernel function, but in the future we should use
1671/// separate calling conventions for kernel and non-kernel functions.
1672SDValue R600TargetLowering::LowerFormalArguments(
1673 SDValue Chain,
1674 CallingConv::ID CallConv,
1675 bool isVarArg,
1676 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001677 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001678 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001679 SmallVector<CCValAssign, 16> ArgLocs;
1680 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1681 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001682 MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault762af962014-07-13 03:06:39 +00001683 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001684
Tom Stellardaf775432013-10-23 00:44:32 +00001685 SmallVector<ISD::InputArg, 8> LocalIns;
1686
Matt Arsenault209a7b92014-04-18 07:40:20 +00001687 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001688
1689 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001690
Tom Stellard1e803092013-07-23 01:48:18 +00001691 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001692 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001693 EVT VT = Ins[i].VT;
1694 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001695
Vincent Lejeunef143af32013-11-11 22:10:24 +00001696 if (ShaderType != ShaderType::COMPUTE) {
1697 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1698 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1699 InVals.push_back(Register);
1700 continue;
1701 }
1702
Tom Stellard75aadc22012-12-11 21:25:42 +00001703 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001704 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001705
Matt Arsenaultfae02982014-03-17 18:58:11 +00001706 // i64 isn't a legal type, so the register type used ends up as i32, which
1707 // isn't expected here. It attempts to create this sextload, but it ends up
1708 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1709 // for <1 x i64>.
1710
Tom Stellardacfeebf2013-07-23 01:48:05 +00001711 // The first 36 bytes of the input buffer contains information about
1712 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001713
1714 // FIXME: This should really check the extload type, but the handling of
1715 // extload vecto parameters seems to be broken.
1716 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1717 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1718 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001719 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1720 MachinePointerInfo(UndefValue::get(PtrTy)),
1721 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001722
1723 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001724 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001725 }
1726 return Chain;
1727}
1728
Matt Arsenault758659232013-05-18 00:21:46 +00001729EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001730 if (!VT.isVector())
1731 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001732 return VT.changeVectorElementTypeToInteger();
1733}
1734
Matt Arsenault209a7b92014-04-18 07:40:20 +00001735static SDValue CompactSwizzlableVector(
1736 SelectionDAG &DAG, SDValue VectorEntry,
1737 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001738 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1739 assert(RemapSwizzle.empty());
1740 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001741 VectorEntry.getOperand(0),
1742 VectorEntry.getOperand(1),
1743 VectorEntry.getOperand(2),
1744 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001745 };
1746
1747 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001748 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1749 // We mask write here to teach later passes that the ith element of this
1750 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1751 // break false dependencies and additionnaly make assembly easier to read.
1752 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001753 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1754 if (C->isZero()) {
1755 RemapSwizzle[i] = 4; // SEL_0
1756 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1757 } else if (C->isExactlyValue(1.0)) {
1758 RemapSwizzle[i] = 5; // SEL_1
1759 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1760 }
1761 }
1762
1763 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1764 continue;
1765 for (unsigned j = 0; j < i; j++) {
1766 if (NewBldVec[i] == NewBldVec[j]) {
1767 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1768 RemapSwizzle[i] = j;
1769 break;
1770 }
1771 }
1772 }
1773
1774 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001775 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001776}
1777
Benjamin Kramer193960c2013-06-11 13:32:25 +00001778static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1779 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001780 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1781 assert(RemapSwizzle.empty());
1782 SDValue NewBldVec[4] = {
1783 VectorEntry.getOperand(0),
1784 VectorEntry.getOperand(1),
1785 VectorEntry.getOperand(2),
1786 VectorEntry.getOperand(3)
1787 };
1788 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001789 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001790 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001791 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1792 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1793 ->getZExtValue();
1794 if (i == Idx)
1795 isUnmovable[Idx] = true;
1796 }
1797 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001798
1799 for (unsigned i = 0; i < 4; i++) {
1800 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1801 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1802 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001803 if (isUnmovable[Idx])
1804 continue;
1805 // Swap i and Idx
1806 std::swap(NewBldVec[Idx], NewBldVec[i]);
1807 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1808 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001809 }
1810 }
1811
1812 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001813 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001814}
1815
1816
1817SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1818SDValue Swz[4], SelectionDAG &DAG) const {
1819 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1820 // Old -> New swizzle values
1821 DenseMap<unsigned, unsigned> SwizzleRemap;
1822
1823 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1824 for (unsigned i = 0; i < 4; i++) {
1825 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1826 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1827 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1828 }
1829
1830 SwizzleRemap.clear();
1831 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1832 for (unsigned i = 0; i < 4; i++) {
1833 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1834 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1835 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1836 }
1837
1838 return BuildVector;
1839}
1840
1841
Tom Stellard75aadc22012-12-11 21:25:42 +00001842//===----------------------------------------------------------------------===//
1843// Custom DAG Optimizations
1844//===----------------------------------------------------------------------===//
1845
1846SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1847 DAGCombinerInfo &DCI) const {
1848 SelectionDAG &DAG = DCI.DAG;
1849
1850 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001851 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001852 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1853 case ISD::FP_ROUND: {
1854 SDValue Arg = N->getOperand(0);
1855 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001856 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001857 Arg.getOperand(0));
1858 }
1859 break;
1860 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001861
1862 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1863 // (i32 select_cc f32, f32, -1, 0 cc)
1864 //
1865 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1866 // this to one of the SET*_DX10 instructions.
1867 case ISD::FP_TO_SINT: {
1868 SDValue FNeg = N->getOperand(0);
1869 if (FNeg.getOpcode() != ISD::FNEG) {
1870 return SDValue();
1871 }
1872 SDValue SelectCC = FNeg.getOperand(0);
1873 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1874 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1875 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1876 !isHWTrueValue(SelectCC.getOperand(2)) ||
1877 !isHWFalseValue(SelectCC.getOperand(3))) {
1878 return SDValue();
1879 }
1880
Andrew Trickef9de2a2013-05-25 02:42:55 +00001881 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001882 SelectCC.getOperand(0), // LHS
1883 SelectCC.getOperand(1), // RHS
1884 DAG.getConstant(-1, MVT::i32), // True
1885 DAG.getConstant(0, MVT::i32), // Flase
1886 SelectCC.getOperand(4)); // CC
1887
1888 break;
1889 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001890
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001891 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1892 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001893 case ISD::INSERT_VECTOR_ELT: {
1894 SDValue InVec = N->getOperand(0);
1895 SDValue InVal = N->getOperand(1);
1896 SDValue EltNo = N->getOperand(2);
1897 SDLoc dl(N);
1898
1899 // If the inserted element is an UNDEF, just use the input vector.
1900 if (InVal.getOpcode() == ISD::UNDEF)
1901 return InVec;
1902
1903 EVT VT = InVec.getValueType();
1904
1905 // If we can't generate a legal BUILD_VECTOR, exit
1906 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1907 return SDValue();
1908
1909 // Check that we know which element is being inserted
1910 if (!isa<ConstantSDNode>(EltNo))
1911 return SDValue();
1912 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1913
1914 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1915 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1916 // vector elements.
1917 SmallVector<SDValue, 8> Ops;
1918 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1919 Ops.append(InVec.getNode()->op_begin(),
1920 InVec.getNode()->op_end());
1921 } else if (InVec.getOpcode() == ISD::UNDEF) {
1922 unsigned NElts = VT.getVectorNumElements();
1923 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1924 } else {
1925 return SDValue();
1926 }
1927
1928 // Insert the element
1929 if (Elt < Ops.size()) {
1930 // All the operands of BUILD_VECTOR must have the same type;
1931 // we enforce that here.
1932 EVT OpVT = Ops[0].getValueType();
1933 if (InVal.getValueType() != OpVT)
1934 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1935 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1936 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1937 Ops[Elt] = InVal;
1938 }
1939
1940 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001941 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001942 }
1943
Tom Stellard365366f2013-01-23 02:09:06 +00001944 // Extract_vec (Build_vector) generated by custom lowering
1945 // also needs to be customly combined
1946 case ISD::EXTRACT_VECTOR_ELT: {
1947 SDValue Arg = N->getOperand(0);
1948 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1949 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1950 unsigned Element = Const->getZExtValue();
1951 return Arg->getOperand(Element);
1952 }
1953 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001954 if (Arg.getOpcode() == ISD::BITCAST &&
1955 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1956 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1957 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001958 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001959 Arg->getOperand(0).getOperand(Element));
1960 }
1961 }
Tom Stellard365366f2013-01-23 02:09:06 +00001962 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001963
1964 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001965 // Try common optimizations
1966 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1967 if (Ret.getNode())
1968 return Ret;
1969
Tom Stellarde06163a2013-02-07 14:02:35 +00001970 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1971 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001972 //
1973 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1974 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001975 SDValue LHS = N->getOperand(0);
1976 if (LHS.getOpcode() != ISD::SELECT_CC) {
1977 return SDValue();
1978 }
1979
1980 SDValue RHS = N->getOperand(1);
1981 SDValue True = N->getOperand(2);
1982 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001983 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001984
1985 if (LHS.getOperand(2).getNode() != True.getNode() ||
1986 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001987 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001988 return SDValue();
1989 }
1990
Tom Stellard5e524892013-03-08 15:37:11 +00001991 switch (NCC) {
1992 default: return SDValue();
1993 case ISD::SETNE: return LHS;
1994 case ISD::SETEQ: {
1995 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1996 LHSCC = ISD::getSetCCInverse(LHSCC,
1997 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001998 if (DCI.isBeforeLegalizeOps() ||
1999 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2000 return DAG.getSelectCC(SDLoc(N),
2001 LHS.getOperand(0),
2002 LHS.getOperand(1),
2003 LHS.getOperand(2),
2004 LHS.getOperand(3),
2005 LHSCC);
2006 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002007 }
Tom Stellard5e524892013-03-08 15:37:11 +00002008 }
Tom Stellardcd428182013-09-28 02:50:38 +00002009 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002010 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002011
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002012 case AMDGPUISD::EXPORT: {
2013 SDValue Arg = N->getOperand(1);
2014 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2015 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002016
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002017 SDValue NewArgs[8] = {
2018 N->getOperand(0), // Chain
2019 SDValue(),
2020 N->getOperand(2), // ArrayBase
2021 N->getOperand(3), // Type
2022 N->getOperand(4), // SWZ_X
2023 N->getOperand(5), // SWZ_Y
2024 N->getOperand(6), // SWZ_Z
2025 N->getOperand(7) // SWZ_W
2026 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002027 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002028 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002029 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002030 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002031 case AMDGPUISD::TEXTURE_FETCH: {
2032 SDValue Arg = N->getOperand(1);
2033 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2034 break;
2035
2036 SDValue NewArgs[19] = {
2037 N->getOperand(0),
2038 N->getOperand(1),
2039 N->getOperand(2),
2040 N->getOperand(3),
2041 N->getOperand(4),
2042 N->getOperand(5),
2043 N->getOperand(6),
2044 N->getOperand(7),
2045 N->getOperand(8),
2046 N->getOperand(9),
2047 N->getOperand(10),
2048 N->getOperand(11),
2049 N->getOperand(12),
2050 N->getOperand(13),
2051 N->getOperand(14),
2052 N->getOperand(15),
2053 N->getOperand(16),
2054 N->getOperand(17),
2055 N->getOperand(18),
2056 };
2057 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2058 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002059 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002060 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002061 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002062
2063 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002064}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002065
2066static bool
2067FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002068 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002069 const R600InstrInfo *TII =
2070 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2071 if (!Src.isMachineOpcode())
2072 return false;
2073 switch (Src.getMachineOpcode()) {
2074 case AMDGPU::FNEG_R600:
2075 if (!Neg.getNode())
2076 return false;
2077 Src = Src.getOperand(0);
2078 Neg = DAG.getTargetConstant(1, MVT::i32);
2079 return true;
2080 case AMDGPU::FABS_R600:
2081 if (!Abs.getNode())
2082 return false;
2083 Src = Src.getOperand(0);
2084 Abs = DAG.getTargetConstant(1, MVT::i32);
2085 return true;
2086 case AMDGPU::CONST_COPY: {
2087 unsigned Opcode = ParentNode->getMachineOpcode();
2088 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2089
2090 if (!Sel.getNode())
2091 return false;
2092
2093 SDValue CstOffset = Src.getOperand(0);
2094 if (ParentNode->getValueType(0).isVector())
2095 return false;
2096
2097 // Gather constants values
2098 int SrcIndices[] = {
2099 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2100 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2101 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2102 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2103 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2104 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2105 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2106 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2107 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2108 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2109 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2110 };
2111 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002112 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002113 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2114 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2115 continue;
2116 if (HasDst) {
2117 OtherSrcIdx--;
2118 OtherSelIdx--;
2119 }
2120 if (RegisterSDNode *Reg =
2121 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2122 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002123 ConstantSDNode *Cst
2124 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002125 Consts.push_back(Cst->getZExtValue());
2126 }
2127 }
2128 }
2129
Matt Arsenault37c12d72014-05-12 20:42:57 +00002130 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002131 Consts.push_back(Cst->getZExtValue());
2132 if (!TII->fitsConstReadLimitations(Consts)) {
2133 return false;
2134 }
2135
2136 Sel = CstOffset;
2137 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2138 return true;
2139 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002140 case AMDGPU::MOV_IMM_I32:
2141 case AMDGPU::MOV_IMM_F32: {
2142 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2143 uint64_t ImmValue = 0;
2144
2145
2146 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2147 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2148 float FloatValue = FPC->getValueAPF().convertToFloat();
2149 if (FloatValue == 0.0) {
2150 ImmReg = AMDGPU::ZERO;
2151 } else if (FloatValue == 0.5) {
2152 ImmReg = AMDGPU::HALF;
2153 } else if (FloatValue == 1.0) {
2154 ImmReg = AMDGPU::ONE;
2155 } else {
2156 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2157 }
2158 } else {
2159 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2160 uint64_t Value = C->getZExtValue();
2161 if (Value == 0) {
2162 ImmReg = AMDGPU::ZERO;
2163 } else if (Value == 1) {
2164 ImmReg = AMDGPU::ONE_INT;
2165 } else {
2166 ImmValue = Value;
2167 }
2168 }
2169
2170 // Check that we aren't already using an immediate.
2171 // XXX: It's possible for an instruction to have more than one
2172 // immediate operand, but this is not supported yet.
2173 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2174 if (!Imm.getNode())
2175 return false;
2176 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2177 assert(C);
2178 if (C->getZExtValue())
2179 return false;
2180 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2181 }
2182 Src = DAG.getRegister(ImmReg, MVT::i32);
2183 return true;
2184 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002185 default:
2186 return false;
2187 }
2188}
2189
2190
2191/// \brief Fold the instructions after selecting them
2192SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2193 SelectionDAG &DAG) const {
2194 const R600InstrInfo *TII =
2195 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2196 if (!Node->isMachineOpcode())
2197 return Node;
2198 unsigned Opcode = Node->getMachineOpcode();
2199 SDValue FakeOp;
2200
2201 std::vector<SDValue> Ops;
Craig Topper66e588b2014-06-29 00:40:57 +00002202 for (const SDUse &I : Node->ops())
2203 Ops.push_back(I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002204
2205 if (Opcode == AMDGPU::DOT_4) {
2206 int OperandIdx[] = {
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002215 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002216 int NegIdx[] = {
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2223 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2224 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2225 };
2226 int AbsIdx[] = {
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2228 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2229 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2230 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2231 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2232 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2233 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2234 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2235 };
2236 for (unsigned i = 0; i < 8; i++) {
2237 if (OperandIdx[i] < 0)
2238 return Node;
2239 SDValue &Src = Ops[OperandIdx[i] - 1];
2240 SDValue &Neg = Ops[NegIdx[i] - 1];
2241 SDValue &Abs = Ops[AbsIdx[i] - 1];
2242 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2243 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2244 if (HasDst)
2245 SelIdx--;
2246 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002247 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2248 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2249 }
2250 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2251 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2252 SDValue &Src = Ops[i];
2253 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002254 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2255 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002256 } else if (Opcode == AMDGPU::CLAMP_R600) {
2257 SDValue Src = Node->getOperand(0);
2258 if (!Src.isMachineOpcode() ||
2259 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2260 return Node;
2261 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2262 AMDGPU::OpName::clamp);
2263 if (ClampIdx < 0)
2264 return Node;
2265 std::vector<SDValue> Ops;
2266 unsigned NumOp = Src.getNumOperands();
2267 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002268 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002269 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2270 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2271 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002272 } else {
2273 if (!TII->hasInstrModifiers(Opcode))
2274 return Node;
2275 int OperandIdx[] = {
2276 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2277 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2278 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2279 };
2280 int NegIdx[] = {
2281 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2282 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2283 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2284 };
2285 int AbsIdx[] = {
2286 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2287 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2288 -1
2289 };
2290 for (unsigned i = 0; i < 3; i++) {
2291 if (OperandIdx[i] < 0)
2292 return Node;
2293 SDValue &Src = Ops[OperandIdx[i] - 1];
2294 SDValue &Neg = Ops[NegIdx[i] - 1];
2295 SDValue FakeAbs;
2296 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2297 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2298 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002299 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2300 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002301 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002302 ImmIdx--;
2303 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002304 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002305 SDValue &Imm = Ops[ImmIdx];
2306 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002307 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2308 }
2309 }
2310
2311 return Node;
2312}