blob: 8c083262f939548e42716b736b86893b77bde22a [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000071
72 setOperationAction(ISD::FSUB, MVT::f32, Expand);
73
74 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
75 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000077
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
79 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
80
Tom Stellarde8f9f282013-03-08 15:37:05 +000081 setOperationAction(ISD::SETCC, MVT::i32, Expand);
82 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000083 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
84
Tom Stellard53f2f902013-09-05 18:38:03 +000085 setOperationAction(ISD::SELECT, MVT::i32, Expand);
86 setOperationAction(ISD::SELECT, MVT::f32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000088 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Matt Arsenault4e466652014-04-16 01:41:30 +000090 // Expand sign extension of vectors
91 if (!Subtarget->hasBFE())
92 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
93
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
96
97 if (!Subtarget->hasBFE())
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
101
102 if (!Subtarget->hasBFE())
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
106
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
112
113
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000114 // Legalize loads and stores to the private address space.
115 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000116 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000117 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000118
119 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
120 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000121 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
122 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000125 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
127
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000128 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000129 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000130 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000131 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000132 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
133 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
Tom Stellard365366f2013-01-23 02:09:06 +0000135 setOperationAction(ISD::LOAD, MVT::i32, Custom);
136 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000137 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
138
Tom Stellard880a80a2014-06-17 16:53:14 +0000139 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
140 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
141 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
143
144 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
145 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
147 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
148
Tom Stellard75aadc22012-12-11 21:25:42 +0000149 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000150 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000151 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000152 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000153 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000154
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000155 setOperationAction(ISD::SUB, MVT::i64, Expand);
156
Tom Stellard5f337882014-04-29 23:12:43 +0000157 // These should be replaced by UDVIREM, but it does not happen automatically
158 // during Type Legalization
159 setOperationAction(ISD::UDIV, MVT::i64, Custom);
160 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000161 setOperationAction(ISD::SDIV, MVT::i64, Custom);
162 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000163
Jan Vesely25f36272014-06-18 12:27:13 +0000164 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
165 // to be Legal/Custom in order to avoid library calls.
166 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000167 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000168 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000169
Michel Danzer49812b52013-07-10 16:37:07 +0000170 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
171
Tom Stellardb852af52013-03-08 15:37:03 +0000172 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000173 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000174 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000175}
176
177MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
178 MachineInstr * MI, MachineBasicBlock * BB) const {
179 MachineFunction * MF = BB->getParent();
180 MachineRegisterInfo &MRI = MF->getRegInfo();
181 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000182 const R600InstrInfo *TII =
183 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000184
185 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000186 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000187 // Replace LDS_*_RET instruction that don't have any uses with the
188 // equivalent LDS_*_NORET instruction.
189 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000190 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
191 assert(DstIdx != -1);
192 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000193 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
194 return BB;
195
196 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
197 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000198 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
199 NewMI.addOperand(MI->getOperand(i));
200 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000201 } else {
202 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
203 }
204 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000205 case AMDGPU::CLAMP_R600: {
206 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
207 AMDGPU::MOV,
208 MI->getOperand(0).getReg(),
209 MI->getOperand(1).getReg());
210 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
211 break;
212 }
213
214 case AMDGPU::FABS_R600: {
215 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
216 AMDGPU::MOV,
217 MI->getOperand(0).getReg(),
218 MI->getOperand(1).getReg());
219 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
220 break;
221 }
222
223 case AMDGPU::FNEG_R600: {
224 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
225 AMDGPU::MOV,
226 MI->getOperand(0).getReg(),
227 MI->getOperand(1).getReg());
228 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
229 break;
230 }
231
Tom Stellard75aadc22012-12-11 21:25:42 +0000232 case AMDGPU::MASK_WRITE: {
233 unsigned maskedRegister = MI->getOperand(0).getReg();
234 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
235 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
236 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
237 break;
238 }
239
240 case AMDGPU::MOV_IMM_F32:
241 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
242 MI->getOperand(1).getFPImm()->getValueAPF()
243 .bitcastToAPInt().getZExtValue());
244 break;
245 case AMDGPU::MOV_IMM_I32:
246 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
247 MI->getOperand(1).getImm());
248 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000249 case AMDGPU::CONST_COPY: {
250 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
251 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000252 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000253 MI->getOperand(1).getImm());
254 break;
255 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000256
257 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000258 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000259 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000260 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000261
262 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
263 .addOperand(MI->getOperand(0))
264 .addOperand(MI->getOperand(1))
265 .addImm(EOP); // Set End of program bit
266 break;
267 }
268
Tom Stellard75aadc22012-12-11 21:25:42 +0000269 case AMDGPU::TXD: {
270 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
271 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000272 MachineOperand &RID = MI->getOperand(4);
273 MachineOperand &SID = MI->getOperand(5);
274 unsigned TextureId = MI->getOperand(6).getImm();
275 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
276 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000277
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000278 switch (TextureId) {
279 case 5: // Rect
280 CTX = CTY = 0;
281 break;
282 case 6: // Shadow1D
283 SrcW = SrcZ;
284 break;
285 case 7: // Shadow2D
286 SrcW = SrcZ;
287 break;
288 case 8: // ShadowRect
289 CTX = CTY = 0;
290 SrcW = SrcZ;
291 break;
292 case 9: // 1DArray
293 SrcZ = SrcY;
294 CTZ = 0;
295 break;
296 case 10: // 2DArray
297 CTZ = 0;
298 break;
299 case 11: // Shadow1DArray
300 SrcZ = SrcY;
301 CTZ = 0;
302 break;
303 case 12: // Shadow2DArray
304 CTZ = 0;
305 break;
306 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000307 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
308 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000309 .addImm(SrcX)
310 .addImm(SrcY)
311 .addImm(SrcZ)
312 .addImm(SrcW)
313 .addImm(0)
314 .addImm(0)
315 .addImm(0)
316 .addImm(0)
317 .addImm(1)
318 .addImm(2)
319 .addImm(3)
320 .addOperand(RID)
321 .addOperand(SID)
322 .addImm(CTX)
323 .addImm(CTY)
324 .addImm(CTZ)
325 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
327 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
346 .addOperand(MI->getOperand(0))
347 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000348 .addImm(SrcX)
349 .addImm(SrcY)
350 .addImm(SrcZ)
351 .addImm(SrcW)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(1)
357 .addImm(2)
358 .addImm(3)
359 .addOperand(RID)
360 .addOperand(SID)
361 .addImm(CTX)
362 .addImm(CTY)
363 .addImm(CTZ)
364 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000365 .addReg(T0, RegState::Implicit)
366 .addReg(T1, RegState::Implicit);
367 break;
368 }
369
370 case AMDGPU::TXD_SHADOW: {
371 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
372 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000373 MachineOperand &RID = MI->getOperand(4);
374 MachineOperand &SID = MI->getOperand(5);
375 unsigned TextureId = MI->getOperand(6).getImm();
376 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
377 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
378
379 switch (TextureId) {
380 case 5: // Rect
381 CTX = CTY = 0;
382 break;
383 case 6: // Shadow1D
384 SrcW = SrcZ;
385 break;
386 case 7: // Shadow2D
387 SrcW = SrcZ;
388 break;
389 case 8: // ShadowRect
390 CTX = CTY = 0;
391 SrcW = SrcZ;
392 break;
393 case 9: // 1DArray
394 SrcZ = SrcY;
395 CTZ = 0;
396 break;
397 case 10: // 2DArray
398 CTZ = 0;
399 break;
400 case 11: // Shadow1DArray
401 SrcZ = SrcY;
402 CTZ = 0;
403 break;
404 case 12: // Shadow2DArray
405 CTZ = 0;
406 break;
407 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000408
409 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
410 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000411 .addImm(SrcX)
412 .addImm(SrcY)
413 .addImm(SrcZ)
414 .addImm(SrcW)
415 .addImm(0)
416 .addImm(0)
417 .addImm(0)
418 .addImm(0)
419 .addImm(1)
420 .addImm(2)
421 .addImm(3)
422 .addOperand(RID)
423 .addOperand(SID)
424 .addImm(CTX)
425 .addImm(CTY)
426 .addImm(CTZ)
427 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000428 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
429 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000430 .addImm(SrcX)
431 .addImm(SrcY)
432 .addImm(SrcZ)
433 .addImm(SrcW)
434 .addImm(0)
435 .addImm(0)
436 .addImm(0)
437 .addImm(0)
438 .addImm(1)
439 .addImm(2)
440 .addImm(3)
441 .addOperand(RID)
442 .addOperand(SID)
443 .addImm(CTX)
444 .addImm(CTY)
445 .addImm(CTZ)
446 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
448 .addOperand(MI->getOperand(0))
449 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000450 .addImm(SrcX)
451 .addImm(SrcY)
452 .addImm(SrcZ)
453 .addImm(SrcW)
454 .addImm(0)
455 .addImm(0)
456 .addImm(0)
457 .addImm(0)
458 .addImm(1)
459 .addImm(2)
460 .addImm(3)
461 .addOperand(RID)
462 .addOperand(SID)
463 .addImm(CTX)
464 .addImm(CTY)
465 .addImm(CTZ)
466 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000467 .addReg(T0, RegState::Implicit)
468 .addReg(T1, RegState::Implicit);
469 break;
470 }
471
472 case AMDGPU::BRANCH:
473 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000474 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 break;
476
477 case AMDGPU::BRANCH_COND_f32: {
478 MachineInstr *NewMI =
479 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
480 AMDGPU::PREDICATE_BIT)
481 .addOperand(MI->getOperand(1))
482 .addImm(OPCODE_IS_NOT_ZERO)
483 .addImm(0); // Flags
484 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000485 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 .addOperand(MI->getOperand(0))
487 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
488 break;
489 }
490
491 case AMDGPU::BRANCH_COND_i32: {
492 MachineInstr *NewMI =
493 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
494 AMDGPU::PREDICATE_BIT)
495 .addOperand(MI->getOperand(1))
496 .addImm(OPCODE_IS_NOT_ZERO_INT)
497 .addImm(0); // Flags
498 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000499 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000500 .addOperand(MI->getOperand(0))
501 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
502 break;
503 }
504
Tom Stellard75aadc22012-12-11 21:25:42 +0000505 case AMDGPU::EG_ExportSwz:
506 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000507 // Instruction is left unmodified if its not the last one of its type
508 bool isLastInstructionOfItsType = true;
509 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000510 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000511 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000512 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000513 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
514 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
515 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
516 .getImm();
517 if (CurrentInstExportType == InstExportType) {
518 isLastInstructionOfItsType = false;
519 break;
520 }
521 }
522 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000523 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000524 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 return BB;
526 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
527 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
528 .addOperand(MI->getOperand(0))
529 .addOperand(MI->getOperand(1))
530 .addOperand(MI->getOperand(2))
531 .addOperand(MI->getOperand(3))
532 .addOperand(MI->getOperand(4))
533 .addOperand(MI->getOperand(5))
534 .addOperand(MI->getOperand(6))
535 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 break;
538 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000539 case AMDGPU::RETURN: {
540 // RETURN instructions must have the live-out registers as implicit uses,
541 // otherwise they appear dead.
542 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
543 MachineInstrBuilder MIB(*MF, MI);
544 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
545 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
546 return BB;
547 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000548 }
549
550 MI->eraseFromParent();
551 return BB;
552}
553
554//===----------------------------------------------------------------------===//
555// Custom DAG Lowering Operations
556//===----------------------------------------------------------------------===//
557
Tom Stellard75aadc22012-12-11 21:25:42 +0000558SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000559 MachineFunction &MF = DAG.getMachineFunction();
560 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000561 switch (Op.getOpcode()) {
562 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000563 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
564 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000565 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000566 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000567 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000568 case ISD::FCOS:
569 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000570 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000571 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000572 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000573 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000574 case ISD::INTRINSIC_VOID: {
575 SDValue Chain = Op.getOperand(0);
576 unsigned IntrinsicID =
577 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
578 switch (IntrinsicID) {
579 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
581 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000582 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000583 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000584 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000585 case AMDGPUIntrinsic::R600_store_swizzle: {
586 const SDValue Args[8] = {
587 Chain,
588 Op.getOperand(2), // Export Value
589 Op.getOperand(3), // ArrayBase
590 Op.getOperand(4), // Type
591 DAG.getConstant(0, MVT::i32), // SWZ_X
592 DAG.getConstant(1, MVT::i32), // SWZ_Y
593 DAG.getConstant(2, MVT::i32), // SWZ_Z
594 DAG.getConstant(3, MVT::i32) // SWZ_W
595 };
Craig Topper48d114b2014-04-26 18:35:24 +0000596 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000597 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000598
Tom Stellard75aadc22012-12-11 21:25:42 +0000599 // default for switch(IntrinsicID)
600 default: break;
601 }
602 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
603 break;
604 }
605 case ISD::INTRINSIC_WO_CHAIN: {
606 unsigned IntrinsicID =
607 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
608 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000609 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000610 switch(IntrinsicID) {
611 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000612 case AMDGPUIntrinsic::R600_load_input: {
613 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
614 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
615 MachineFunction &MF = DAG.getMachineFunction();
616 MachineRegisterInfo &MRI = MF.getRegInfo();
617 MRI.addLiveIn(Reg);
618 return DAG.getCopyFromReg(DAG.getEntryNode(),
619 SDLoc(DAG.getEntryNode()), Reg, VT);
620 }
621
622 case AMDGPUIntrinsic::R600_interp_input: {
623 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
624 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
625 MachineSDNode *interp;
626 if (ijb < 0) {
627 const MachineFunction &MF = DAG.getMachineFunction();
628 const R600InstrInfo *TII =
629 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
630 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
631 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
632 return DAG.getTargetExtractSubreg(
633 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
634 DL, MVT::f32, SDValue(interp, 0));
635 }
636 MachineFunction &MF = DAG.getMachineFunction();
637 MachineRegisterInfo &MRI = MF.getRegInfo();
638 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
639 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
640 MRI.addLiveIn(RegisterI);
641 MRI.addLiveIn(RegisterJ);
642 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
643 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
644 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
645 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
646
647 if (slot % 4 < 2)
648 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
649 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
650 RegisterJNode, RegisterINode);
651 else
652 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
653 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
654 RegisterJNode, RegisterINode);
655 return SDValue(interp, slot % 2);
656 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000657 case AMDGPUIntrinsic::R600_interp_xy:
658 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000659 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000660 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000661 SDValue RegisterINode = Op.getOperand(2);
662 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000663
Vincent Lejeunef143af32013-11-11 22:10:24 +0000664 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000665 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000666 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000667 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000668 else
669 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000670 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000671 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000672 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
673 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000674 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000675 case AMDGPUIntrinsic::R600_tex:
676 case AMDGPUIntrinsic::R600_texc:
677 case AMDGPUIntrinsic::R600_txl:
678 case AMDGPUIntrinsic::R600_txlc:
679 case AMDGPUIntrinsic::R600_txb:
680 case AMDGPUIntrinsic::R600_txbc:
681 case AMDGPUIntrinsic::R600_txf:
682 case AMDGPUIntrinsic::R600_txq:
683 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000684 case AMDGPUIntrinsic::R600_ddy:
685 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000686 unsigned TextureOp;
687 switch (IntrinsicID) {
688 case AMDGPUIntrinsic::R600_tex:
689 TextureOp = 0;
690 break;
691 case AMDGPUIntrinsic::R600_texc:
692 TextureOp = 1;
693 break;
694 case AMDGPUIntrinsic::R600_txl:
695 TextureOp = 2;
696 break;
697 case AMDGPUIntrinsic::R600_txlc:
698 TextureOp = 3;
699 break;
700 case AMDGPUIntrinsic::R600_txb:
701 TextureOp = 4;
702 break;
703 case AMDGPUIntrinsic::R600_txbc:
704 TextureOp = 5;
705 break;
706 case AMDGPUIntrinsic::R600_txf:
707 TextureOp = 6;
708 break;
709 case AMDGPUIntrinsic::R600_txq:
710 TextureOp = 7;
711 break;
712 case AMDGPUIntrinsic::R600_ddx:
713 TextureOp = 8;
714 break;
715 case AMDGPUIntrinsic::R600_ddy:
716 TextureOp = 9;
717 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000718 case AMDGPUIntrinsic::R600_ldptr:
719 TextureOp = 10;
720 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 default:
722 llvm_unreachable("Unknow Texture Operation");
723 }
724
725 SDValue TexArgs[19] = {
726 DAG.getConstant(TextureOp, MVT::i32),
727 Op.getOperand(1),
728 DAG.getConstant(0, MVT::i32),
729 DAG.getConstant(1, MVT::i32),
730 DAG.getConstant(2, MVT::i32),
731 DAG.getConstant(3, MVT::i32),
732 Op.getOperand(2),
733 Op.getOperand(3),
734 Op.getOperand(4),
735 DAG.getConstant(0, MVT::i32),
736 DAG.getConstant(1, MVT::i32),
737 DAG.getConstant(2, MVT::i32),
738 DAG.getConstant(3, MVT::i32),
739 Op.getOperand(5),
740 Op.getOperand(6),
741 Op.getOperand(7),
742 Op.getOperand(8),
743 Op.getOperand(9),
744 Op.getOperand(10)
745 };
Craig Topper48d114b2014-04-26 18:35:24 +0000746 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000747 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000748 case AMDGPUIntrinsic::AMDGPU_dp4: {
749 SDValue Args[8] = {
750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
751 DAG.getConstant(0, MVT::i32)),
752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
753 DAG.getConstant(0, MVT::i32)),
754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
755 DAG.getConstant(1, MVT::i32)),
756 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
757 DAG.getConstant(1, MVT::i32)),
758 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
759 DAG.getConstant(2, MVT::i32)),
760 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
761 DAG.getConstant(2, MVT::i32)),
762 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
763 DAG.getConstant(3, MVT::i32)),
764 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
765 DAG.getConstant(3, MVT::i32))
766 };
Craig Topper48d114b2014-04-26 18:35:24 +0000767 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000768 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000769
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000786 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000787 return LowerImplicitParameter(DAG, VT, DL, 8);
788
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000789 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000790 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
791 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000792 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000793 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
794 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
797 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
800 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
803 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
806 AMDGPU::T0_Z, VT);
807 }
808 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
809 break;
810 }
811 } // end switch(Op.getOpcode())
812 return SDValue();
813}
814
815void R600TargetLowering::ReplaceNodeResults(SDNode *N,
816 SmallVectorImpl<SDValue> &Results,
817 SelectionDAG &DAG) const {
818 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000819 default:
820 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
821 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000823 return;
824 case ISD::LOAD: {
825 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
826 Results.push_back(SDValue(Node, 0));
827 Results.push_back(SDValue(Node, 1));
828 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
829 // function
830 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
831 return;
832 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000833 case ISD::STORE: {
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000834 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
835 Results.push_back(SDValue(Node, 0));
836 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000837 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000838 case ISD::UDIV: {
839 SDValue Op = SDValue(N, 0);
840 SDLoc DL(Op);
841 EVT VT = Op.getValueType();
842 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
843 N->getOperand(0), N->getOperand(1));
844 Results.push_back(UDIVREM);
845 break;
846 }
847 case ISD::UREM: {
848 SDValue Op = SDValue(N, 0);
849 SDLoc DL(Op);
850 EVT VT = Op.getValueType();
851 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
852 N->getOperand(0), N->getOperand(1));
853 Results.push_back(UDIVREM.getValue(1));
854 break;
855 }
856 case ISD::SDIV: {
857 SDValue Op = SDValue(N, 0);
858 SDLoc DL(Op);
859 EVT VT = Op.getValueType();
860 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
861 N->getOperand(0), N->getOperand(1));
862 Results.push_back(SDIVREM);
863 break;
864 }
865 case ISD::SREM: {
866 SDValue Op = SDValue(N, 0);
867 SDLoc DL(Op);
868 EVT VT = Op.getValueType();
869 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
870 N->getOperand(0), N->getOperand(1));
871 Results.push_back(SDIVREM.getValue(1));
872 break;
873 }
874 case ISD::SDIVREM: {
875 SDValue Op = SDValue(N, 1);
876 SDValue RES = LowerSDIVREM(Op, DAG);
877 Results.push_back(RES);
878 Results.push_back(RES.getValue(1));
879 break;
880 }
881 case ISD::UDIVREM: {
882 SDValue Op = SDValue(N, 0);
883 SDLoc DL(Op);
884 EVT VT = Op.getValueType();
885 EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
886
887 SDValue one = DAG.getConstant(1, HalfVT);
888 SDValue zero = DAG.getConstant(0, HalfVT);
889
890 //HiLo split
891 SDValue LHS = N->getOperand(0);
892 SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
893 SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
894
895 SDValue RHS = N->getOperand(1);
896 SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
897 SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
898
899 // Get Speculative values
900 SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
901 SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
902
903 SDValue REM_Hi = zero;
904 SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
905
906 SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
907 SDValue DIV_Lo = zero;
908
909 const unsigned halfBitWidth = HalfVT.getSizeInBits();
910
911 for (unsigned i = 0; i < halfBitWidth; ++i) {
912 SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
913 // Get Value of high bit
914 SDValue HBit;
915 if (halfBitWidth == 32 && Subtarget->hasBFE()) {
916 HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
917 } else {
918 HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
919 HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
920 }
921
922 SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
923 DAG.getConstant(halfBitWidth - 1, HalfVT));
924 REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
925 REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
926
927 REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
928 REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
929
930
931 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
932
933 SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
934 SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
935
936 DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
937
938 // Update REM
939
940 SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
941
942 REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
943 REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
944 REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
945 }
946
947 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
948 SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
949 Results.push_back(DIV);
950 Results.push_back(REM);
951 break;
952 }
953 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000954}
955
Tom Stellard880a80a2014-06-17 16:53:14 +0000956SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
957 SDValue Vector) const {
958
959 SDLoc DL(Vector);
960 EVT VecVT = Vector.getValueType();
961 EVT EltVT = VecVT.getVectorElementType();
962 SmallVector<SDValue, 8> Args;
963
964 for (unsigned i = 0, e = VecVT.getVectorNumElements();
965 i != e; ++i) {
966 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
967 Vector, DAG.getConstant(i, getVectorIdxTy())));
968 }
969
970 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
971}
972
973SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
974 SelectionDAG &DAG) const {
975
976 SDLoc DL(Op);
977 SDValue Vector = Op.getOperand(0);
978 SDValue Index = Op.getOperand(1);
979
980 if (isa<ConstantSDNode>(Index) ||
981 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
982 return Op;
983
984 Vector = vectorToVerticalVector(DAG, Vector);
985 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
986 Vector, Index);
987}
988
989SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
990 SelectionDAG &DAG) const {
991 SDLoc DL(Op);
992 SDValue Vector = Op.getOperand(0);
993 SDValue Value = Op.getOperand(1);
994 SDValue Index = Op.getOperand(2);
995
996 if (isa<ConstantSDNode>(Index) ||
997 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
998 return Op;
999
1000 Vector = vectorToVerticalVector(DAG, Vector);
1001 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
1002 Vector, Value, Index);
1003 return vectorToVerticalVector(DAG, Insert);
1004}
1005
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001006SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1007 // On hw >= R700, COS/SIN input must be between -1. and 1.
1008 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
1009 EVT VT = Op.getValueType();
1010 SDValue Arg = Op.getOperand(0);
1011 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
1012 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
1013 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
1014 DAG.getConstantFP(0.15915494309, MVT::f32)),
1015 DAG.getConstantFP(0.5, MVT::f32)));
1016 unsigned TrigNode;
1017 switch (Op.getOpcode()) {
1018 case ISD::FCOS:
1019 TrigNode = AMDGPUISD::COS_HW;
1020 break;
1021 case ISD::FSIN:
1022 TrigNode = AMDGPUISD::SIN_HW;
1023 break;
1024 default:
1025 llvm_unreachable("Wrong trig opcode");
1026 }
1027 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
1028 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
1029 DAG.getConstantFP(-0.5, MVT::f32)));
1030 if (Gen >= AMDGPUSubtarget::R700)
1031 return TrigVal;
1032 // On R600 hw, COS/SIN input must be between -Pi and Pi.
1033 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1034 DAG.getConstantFP(3.14159265359, MVT::f32));
1035}
1036
Jan Vesely25f36272014-06-18 12:27:13 +00001037SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1038 SDLoc DL(Op);
1039 EVT VT = Op.getValueType();
1040
1041 SDValue Lo = Op.getOperand(0);
1042 SDValue Hi = Op.getOperand(1);
1043 SDValue Shift = Op.getOperand(2);
1044 SDValue Zero = DAG.getConstant(0, VT);
1045 SDValue One = DAG.getConstant(1, VT);
1046
1047 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1048 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1049 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1050 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1051
1052 // The dance around Width1 is necessary for 0 special case.
1053 // Without it the CompShift might be 32, producing incorrect results in
1054 // Overflow. So we do the shift in two steps, the alternative is to
1055 // add a conditional to filter the special case.
1056
1057 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1058 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1059
1060 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1061 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1062 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1063
1064 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1065 SDValue LoBig = Zero;
1066
1067 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1068 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1069
1070 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1071}
1072
Jan Vesely900ff2e2014-06-18 12:27:15 +00001073SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1074 SDLoc DL(Op);
1075 EVT VT = Op.getValueType();
1076
1077 SDValue Lo = Op.getOperand(0);
1078 SDValue Hi = Op.getOperand(1);
1079 SDValue Shift = Op.getOperand(2);
1080 SDValue Zero = DAG.getConstant(0, VT);
1081 SDValue One = DAG.getConstant(1, VT);
1082
Jan Veselyecf51332014-06-18 12:27:17 +00001083 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1084
Jan Vesely900ff2e2014-06-18 12:27:15 +00001085 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1086 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1087 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1088 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1089
1090 // The dance around Width1 is necessary for 0 special case.
1091 // Without it the CompShift might be 32, producing incorrect results in
1092 // Overflow. So we do the shift in two steps, the alternative is to
1093 // add a conditional to filter the special case.
1094
1095 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1096 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1097
Jan Veselyecf51332014-06-18 12:27:17 +00001098 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001099 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1100 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1101
Jan Veselyecf51332014-06-18 12:27:17 +00001102 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1103 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001104
1105 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1106 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1107
1108 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1109}
1110
Tom Stellard75aadc22012-12-11 21:25:42 +00001111SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1112 return DAG.getNode(
1113 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001114 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001115 MVT::i1,
1116 Op, DAG.getConstantFP(0.0f, MVT::f32),
1117 DAG.getCondCode(ISD::SETNE)
1118 );
1119}
1120
Tom Stellard75aadc22012-12-11 21:25:42 +00001121SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001122 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001123 unsigned DwordOffset) const {
1124 unsigned ByteOffset = DwordOffset * 4;
1125 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001126 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001127
1128 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1129 assert(isInt<16>(ByteOffset));
1130
1131 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1132 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1133 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1134 false, false, false, 0);
1135}
1136
Tom Stellard75aadc22012-12-11 21:25:42 +00001137bool R600TargetLowering::isZero(SDValue Op) const {
1138 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1139 return Cst->isNullValue();
1140 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1141 return CstFP->isZero();
1142 } else {
1143 return false;
1144 }
1145}
1146
1147SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001148 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001149 EVT VT = Op.getValueType();
1150
1151 SDValue LHS = Op.getOperand(0);
1152 SDValue RHS = Op.getOperand(1);
1153 SDValue True = Op.getOperand(2);
1154 SDValue False = Op.getOperand(3);
1155 SDValue CC = Op.getOperand(4);
1156 SDValue Temp;
1157
1158 // LHS and RHS are guaranteed to be the same value type
1159 EVT CompareVT = LHS.getValueType();
1160
1161 // Check if we can lower this to a native operation.
1162
Tom Stellard2add82d2013-03-08 15:37:09 +00001163 // Try to lower to a SET* instruction:
1164 //
1165 // SET* can match the following patterns:
1166 //
Tom Stellardcd428182013-09-28 02:50:38 +00001167 // select_cc f32, f32, -1, 0, cc_supported
1168 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1169 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001170 //
1171
1172 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001173 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1174 ISD::CondCode InverseCC =
1175 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001176 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1177 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1178 std::swap(False, True);
1179 CC = DAG.getCondCode(InverseCC);
1180 } else {
1181 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1182 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1183 std::swap(False, True);
1184 std::swap(LHS, RHS);
1185 CC = DAG.getCondCode(SwapInvCC);
1186 }
1187 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001188 }
1189
1190 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1191 (CompareVT == VT || VT == MVT::i32)) {
1192 // This can be matched by a SET* instruction.
1193 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1194 }
1195
Tom Stellard75aadc22012-12-11 21:25:42 +00001196 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001197 //
1198 // CND* can match the following patterns:
1199 //
Tom Stellardcd428182013-09-28 02:50:38 +00001200 // select_cc f32, 0.0, f32, f32, cc_supported
1201 // select_cc f32, 0.0, i32, i32, cc_supported
1202 // select_cc i32, 0, f32, f32, cc_supported
1203 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001204 //
Tom Stellardcd428182013-09-28 02:50:38 +00001205
1206 // Try to move the zero value to the RHS
1207 if (isZero(LHS)) {
1208 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1209 // Try swapping the operands
1210 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1211 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1212 std::swap(LHS, RHS);
1213 CC = DAG.getCondCode(CCSwapped);
1214 } else {
1215 // Try inverting the conditon and then swapping the operands
1216 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1217 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1218 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1219 std::swap(True, False);
1220 std::swap(LHS, RHS);
1221 CC = DAG.getCondCode(CCSwapped);
1222 }
1223 }
1224 }
1225 if (isZero(RHS)) {
1226 SDValue Cond = LHS;
1227 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001228 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1229 if (CompareVT != VT) {
1230 // Bitcast True / False to the correct types. This will end up being
1231 // a nop, but it allows us to define only a single pattern in the
1232 // .TD files for each CND* instruction rather than having to have
1233 // one pattern for integer True/False and one for fp True/False
1234 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1235 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1236 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001237
1238 switch (CCOpcode) {
1239 case ISD::SETONE:
1240 case ISD::SETUNE:
1241 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001242 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1243 Temp = True;
1244 True = False;
1245 False = Temp;
1246 break;
1247 default:
1248 break;
1249 }
1250 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1251 Cond, Zero,
1252 True, False,
1253 DAG.getCondCode(CCOpcode));
1254 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1255 }
1256
Tom Stellard75aadc22012-12-11 21:25:42 +00001257 // If we make it this for it means we have no native instructions to handle
1258 // this SELECT_CC, so we must lower it.
1259 SDValue HWTrue, HWFalse;
1260
1261 if (CompareVT == MVT::f32) {
1262 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1263 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1264 } else if (CompareVT == MVT::i32) {
1265 HWTrue = DAG.getConstant(-1, CompareVT);
1266 HWFalse = DAG.getConstant(0, CompareVT);
1267 }
1268 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001269 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001270 }
1271
1272 // Lower this unsupported SELECT_CC into a combination of two supported
1273 // SELECT_CC operations.
1274 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1275
1276 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1277 Cond, HWFalse,
1278 True, False,
1279 DAG.getCondCode(ISD::SETNE));
1280}
1281
Alp Tokercb402912014-01-24 17:20:08 +00001282/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001283/// convert these pointers to a register index. Each register holds
1284/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1285/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1286/// for indirect addressing.
1287SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1288 unsigned StackWidth,
1289 SelectionDAG &DAG) const {
1290 unsigned SRLPad;
1291 switch(StackWidth) {
1292 case 1:
1293 SRLPad = 2;
1294 break;
1295 case 2:
1296 SRLPad = 3;
1297 break;
1298 case 4:
1299 SRLPad = 4;
1300 break;
1301 default: llvm_unreachable("Invalid stack width");
1302 }
1303
Andrew Trickef9de2a2013-05-25 02:42:55 +00001304 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001305 DAG.getConstant(SRLPad, MVT::i32));
1306}
1307
1308void R600TargetLowering::getStackAddress(unsigned StackWidth,
1309 unsigned ElemIdx,
1310 unsigned &Channel,
1311 unsigned &PtrIncr) const {
1312 switch (StackWidth) {
1313 default:
1314 case 1:
1315 Channel = 0;
1316 if (ElemIdx > 0) {
1317 PtrIncr = 1;
1318 } else {
1319 PtrIncr = 0;
1320 }
1321 break;
1322 case 2:
1323 Channel = ElemIdx % 2;
1324 if (ElemIdx == 2) {
1325 PtrIncr = 1;
1326 } else {
1327 PtrIncr = 0;
1328 }
1329 break;
1330 case 4:
1331 Channel = ElemIdx;
1332 PtrIncr = 0;
1333 break;
1334 }
1335}
1336
Tom Stellard75aadc22012-12-11 21:25:42 +00001337SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001338 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001339 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1340 SDValue Chain = Op.getOperand(0);
1341 SDValue Value = Op.getOperand(1);
1342 SDValue Ptr = Op.getOperand(2);
1343
Tom Stellard2ffc3302013-08-26 15:05:44 +00001344 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001345 if (Result.getNode()) {
1346 return Result;
1347 }
1348
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001349 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1350 if (StoreNode->isTruncatingStore()) {
1351 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001352 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001353 EVT MemVT = StoreNode->getMemoryVT();
1354 SDValue MaskConstant;
1355 if (MemVT == MVT::i8) {
1356 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1357 } else {
1358 assert(MemVT == MVT::i16);
1359 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1360 }
1361 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1362 DAG.getConstant(2, MVT::i32));
1363 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1364 DAG.getConstant(0x00000003, VT));
1365 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1366 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1367 DAG.getConstant(3, VT));
1368 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1369 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1370 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1371 // vector instead.
1372 SDValue Src[4] = {
1373 ShiftedValue,
1374 DAG.getConstant(0, MVT::i32),
1375 DAG.getConstant(0, MVT::i32),
1376 Mask
1377 };
Craig Topper48d114b2014-04-26 18:35:24 +00001378 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001379 SDValue Args[3] = { Chain, Input, DWordAddr };
1380 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001381 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001382 StoreNode->getMemOperand());
1383 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1384 Value.getValueType().bitsGE(MVT::i32)) {
1385 // Convert pointer from byte address to dword address.
1386 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1387 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1388 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001389
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001390 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001391 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001392 } else {
1393 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1394 }
1395 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001396 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001397 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001398
1399 EVT ValueVT = Value.getValueType();
1400
1401 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1402 return SDValue();
1403 }
1404
Tom Stellarde9373602014-01-22 19:24:14 +00001405 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1406 if (Ret.getNode()) {
1407 return Ret;
1408 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001409 // Lowering for indirect addressing
1410
1411 const MachineFunction &MF = DAG.getMachineFunction();
1412 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1413 getTargetMachine().getFrameLowering());
1414 unsigned StackWidth = TFL->getStackWidth(MF);
1415
1416 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1417
1418 if (ValueVT.isVector()) {
1419 unsigned NumElemVT = ValueVT.getVectorNumElements();
1420 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001421 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001422
1423 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1424 "vector width in load");
1425
1426 for (unsigned i = 0; i < NumElemVT; ++i) {
1427 unsigned Channel, PtrIncr;
1428 getStackAddress(StackWidth, i, Channel, PtrIncr);
1429 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1430 DAG.getConstant(PtrIncr, MVT::i32));
1431 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1432 Value, DAG.getConstant(i, MVT::i32));
1433
1434 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1435 Chain, Elem, Ptr,
1436 DAG.getTargetConstant(Channel, MVT::i32));
1437 }
Craig Topper48d114b2014-04-26 18:35:24 +00001438 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001439 } else {
1440 if (ValueVT == MVT::i8) {
1441 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1442 }
1443 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001444 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001445 }
1446
1447 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001448}
1449
Tom Stellard365366f2013-01-23 02:09:06 +00001450// return (512 + (kc_bank << 12)
1451static int
1452ConstantAddressBlock(unsigned AddressSpace) {
1453 switch (AddressSpace) {
1454 case AMDGPUAS::CONSTANT_BUFFER_0:
1455 return 512;
1456 case AMDGPUAS::CONSTANT_BUFFER_1:
1457 return 512 + 4096;
1458 case AMDGPUAS::CONSTANT_BUFFER_2:
1459 return 512 + 4096 * 2;
1460 case AMDGPUAS::CONSTANT_BUFFER_3:
1461 return 512 + 4096 * 3;
1462 case AMDGPUAS::CONSTANT_BUFFER_4:
1463 return 512 + 4096 * 4;
1464 case AMDGPUAS::CONSTANT_BUFFER_5:
1465 return 512 + 4096 * 5;
1466 case AMDGPUAS::CONSTANT_BUFFER_6:
1467 return 512 + 4096 * 6;
1468 case AMDGPUAS::CONSTANT_BUFFER_7:
1469 return 512 + 4096 * 7;
1470 case AMDGPUAS::CONSTANT_BUFFER_8:
1471 return 512 + 4096 * 8;
1472 case AMDGPUAS::CONSTANT_BUFFER_9:
1473 return 512 + 4096 * 9;
1474 case AMDGPUAS::CONSTANT_BUFFER_10:
1475 return 512 + 4096 * 10;
1476 case AMDGPUAS::CONSTANT_BUFFER_11:
1477 return 512 + 4096 * 11;
1478 case AMDGPUAS::CONSTANT_BUFFER_12:
1479 return 512 + 4096 * 12;
1480 case AMDGPUAS::CONSTANT_BUFFER_13:
1481 return 512 + 4096 * 13;
1482 case AMDGPUAS::CONSTANT_BUFFER_14:
1483 return 512 + 4096 * 14;
1484 case AMDGPUAS::CONSTANT_BUFFER_15:
1485 return 512 + 4096 * 15;
1486 default:
1487 return -1;
1488 }
1489}
1490
1491SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1492{
1493 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001494 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001495 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1496 SDValue Chain = Op.getOperand(0);
1497 SDValue Ptr = Op.getOperand(1);
1498 SDValue LoweredLoad;
1499
Tom Stellarde9373602014-01-22 19:24:14 +00001500 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1501 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001502 SDValue Ops[2] = {
1503 Ret,
1504 Chain
1505 };
Craig Topper64941d92014-04-27 19:20:57 +00001506 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001507 }
1508
1509
Tom Stellard35bb18c2013-08-26 15:06:04 +00001510 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1511 SDValue MergedValues[2] = {
1512 SplitVectorLoad(Op, DAG),
1513 Chain
1514 };
Craig Topper64941d92014-04-27 19:20:57 +00001515 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001516 }
1517
Tom Stellard365366f2013-01-23 02:09:06 +00001518 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001519 if (ConstantBlock > -1 &&
1520 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1521 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001522 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001523 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1524 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001525 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001526 SDValue Slots[4];
1527 for (unsigned i = 0; i < 4; i++) {
1528 // We want Const position encoded with the following formula :
1529 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1530 // const_index is Ptr computed by llvm using an alignment of 16.
1531 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1532 // then div by 4 at the ISel step
1533 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1534 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1535 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1536 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001537 EVT NewVT = MVT::v4i32;
1538 unsigned NumElements = 4;
1539 if (VT.isVector()) {
1540 NewVT = VT;
1541 NumElements = VT.getVectorNumElements();
1542 }
Craig Topper48d114b2014-04-26 18:35:24 +00001543 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001544 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001545 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001546 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001547 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001548 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001549 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001550 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001551 );
1552 }
1553
1554 if (!VT.isVector()) {
1555 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1556 DAG.getConstant(0, MVT::i32));
1557 }
1558
1559 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001560 Result,
1561 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001562 };
Craig Topper64941d92014-04-27 19:20:57 +00001563 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001564 }
1565
Matt Arsenault909d0c02013-10-30 23:43:29 +00001566 // For most operations returning SDValue() will result in the node being
1567 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1568 // need to manually expand loads that may be legal in some address spaces and
1569 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1570 // compute shaders, since the data is sign extended when it is uploaded to the
1571 // buffer. However SEXT loads from other address spaces are not supported, so
1572 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001573 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1574 EVT MemVT = LoadNode->getMemoryVT();
1575 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1576 SDValue ShiftAmount =
1577 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1578 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1579 LoadNode->getPointerInfo(), MemVT,
1580 LoadNode->isVolatile(),
1581 LoadNode->isNonTemporal(),
1582 LoadNode->getAlignment());
1583 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1584 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1585
1586 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001587 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001588 }
1589
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001590 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1591 return SDValue();
1592 }
1593
1594 // Lowering for indirect addressing
1595 const MachineFunction &MF = DAG.getMachineFunction();
1596 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1597 getTargetMachine().getFrameLowering());
1598 unsigned StackWidth = TFL->getStackWidth(MF);
1599
1600 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1601
1602 if (VT.isVector()) {
1603 unsigned NumElemVT = VT.getVectorNumElements();
1604 EVT ElemVT = VT.getVectorElementType();
1605 SDValue Loads[4];
1606
1607 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1608 "vector width in load");
1609
1610 for (unsigned i = 0; i < NumElemVT; ++i) {
1611 unsigned Channel, PtrIncr;
1612 getStackAddress(StackWidth, i, Channel, PtrIncr);
1613 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1614 DAG.getConstant(PtrIncr, MVT::i32));
1615 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1616 Chain, Ptr,
1617 DAG.getTargetConstant(Channel, MVT::i32),
1618 Op.getOperand(2));
1619 }
1620 for (unsigned i = NumElemVT; i < 4; ++i) {
1621 Loads[i] = DAG.getUNDEF(ElemVT);
1622 }
1623 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001624 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001625 } else {
1626 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1627 Chain, Ptr,
1628 DAG.getTargetConstant(0, MVT::i32), // Channel
1629 Op.getOperand(2));
1630 }
1631
Matt Arsenault7939acd2014-04-07 16:44:24 +00001632 SDValue Ops[2] = {
1633 LoweredLoad,
1634 Chain
1635 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001636
Craig Topper64941d92014-04-27 19:20:57 +00001637 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001638}
Tom Stellard75aadc22012-12-11 21:25:42 +00001639
Tom Stellard75aadc22012-12-11 21:25:42 +00001640/// XXX Only kernel functions are supported, so we can assume for now that
1641/// every function is a kernel function, but in the future we should use
1642/// separate calling conventions for kernel and non-kernel functions.
1643SDValue R600TargetLowering::LowerFormalArguments(
1644 SDValue Chain,
1645 CallingConv::ID CallConv,
1646 bool isVarArg,
1647 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001648 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001649 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001650 SmallVector<CCValAssign, 16> ArgLocs;
1651 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1652 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001653 MachineFunction &MF = DAG.getMachineFunction();
1654 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001655
Tom Stellardaf775432013-10-23 00:44:32 +00001656 SmallVector<ISD::InputArg, 8> LocalIns;
1657
Matt Arsenault209a7b92014-04-18 07:40:20 +00001658 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001659
1660 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001661
Tom Stellard1e803092013-07-23 01:48:18 +00001662 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001663 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001664 EVT VT = Ins[i].VT;
1665 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001666
Vincent Lejeunef143af32013-11-11 22:10:24 +00001667 if (ShaderType != ShaderType::COMPUTE) {
1668 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1669 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1670 InVals.push_back(Register);
1671 continue;
1672 }
1673
Tom Stellard75aadc22012-12-11 21:25:42 +00001674 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001675 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001676
Matt Arsenaultfae02982014-03-17 18:58:11 +00001677 // i64 isn't a legal type, so the register type used ends up as i32, which
1678 // isn't expected here. It attempts to create this sextload, but it ends up
1679 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1680 // for <1 x i64>.
1681
Tom Stellardacfeebf2013-07-23 01:48:05 +00001682 // The first 36 bytes of the input buffer contains information about
1683 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001684
1685 // FIXME: This should really check the extload type, but the handling of
1686 // extload vecto parameters seems to be broken.
1687 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1688 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1689 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001690 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1691 MachinePointerInfo(UndefValue::get(PtrTy)),
1692 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001693
1694 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001695 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001696 }
1697 return Chain;
1698}
1699
Matt Arsenault758659232013-05-18 00:21:46 +00001700EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001701 if (!VT.isVector())
1702 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001703 return VT.changeVectorElementTypeToInteger();
1704}
1705
Matt Arsenault209a7b92014-04-18 07:40:20 +00001706static SDValue CompactSwizzlableVector(
1707 SelectionDAG &DAG, SDValue VectorEntry,
1708 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001709 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1710 assert(RemapSwizzle.empty());
1711 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001712 VectorEntry.getOperand(0),
1713 VectorEntry.getOperand(1),
1714 VectorEntry.getOperand(2),
1715 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001716 };
1717
1718 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001719 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1720 // We mask write here to teach later passes that the ith element of this
1721 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1722 // break false dependencies and additionnaly make assembly easier to read.
1723 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001724 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1725 if (C->isZero()) {
1726 RemapSwizzle[i] = 4; // SEL_0
1727 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1728 } else if (C->isExactlyValue(1.0)) {
1729 RemapSwizzle[i] = 5; // SEL_1
1730 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1731 }
1732 }
1733
1734 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1735 continue;
1736 for (unsigned j = 0; j < i; j++) {
1737 if (NewBldVec[i] == NewBldVec[j]) {
1738 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1739 RemapSwizzle[i] = j;
1740 break;
1741 }
1742 }
1743 }
1744
1745 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001746 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001747}
1748
Benjamin Kramer193960c2013-06-11 13:32:25 +00001749static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1750 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001751 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1752 assert(RemapSwizzle.empty());
1753 SDValue NewBldVec[4] = {
1754 VectorEntry.getOperand(0),
1755 VectorEntry.getOperand(1),
1756 VectorEntry.getOperand(2),
1757 VectorEntry.getOperand(3)
1758 };
1759 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001760 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001761 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001762 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1763 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1764 ->getZExtValue();
1765 if (i == Idx)
1766 isUnmovable[Idx] = true;
1767 }
1768 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001769
1770 for (unsigned i = 0; i < 4; i++) {
1771 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1772 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1773 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001774 if (isUnmovable[Idx])
1775 continue;
1776 // Swap i and Idx
1777 std::swap(NewBldVec[Idx], NewBldVec[i]);
1778 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1779 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001780 }
1781 }
1782
1783 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001784 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001785}
1786
1787
1788SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1789SDValue Swz[4], SelectionDAG &DAG) const {
1790 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1791 // Old -> New swizzle values
1792 DenseMap<unsigned, unsigned> SwizzleRemap;
1793
1794 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1795 for (unsigned i = 0; i < 4; i++) {
1796 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1797 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1798 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1799 }
1800
1801 SwizzleRemap.clear();
1802 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1803 for (unsigned i = 0; i < 4; i++) {
1804 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1805 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1806 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1807 }
1808
1809 return BuildVector;
1810}
1811
1812
Tom Stellard75aadc22012-12-11 21:25:42 +00001813//===----------------------------------------------------------------------===//
1814// Custom DAG Optimizations
1815//===----------------------------------------------------------------------===//
1816
1817SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1818 DAGCombinerInfo &DCI) const {
1819 SelectionDAG &DAG = DCI.DAG;
1820
1821 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001822 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001823 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1824 case ISD::FP_ROUND: {
1825 SDValue Arg = N->getOperand(0);
1826 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001827 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001828 Arg.getOperand(0));
1829 }
1830 break;
1831 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001832
1833 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1834 // (i32 select_cc f32, f32, -1, 0 cc)
1835 //
1836 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1837 // this to one of the SET*_DX10 instructions.
1838 case ISD::FP_TO_SINT: {
1839 SDValue FNeg = N->getOperand(0);
1840 if (FNeg.getOpcode() != ISD::FNEG) {
1841 return SDValue();
1842 }
1843 SDValue SelectCC = FNeg.getOperand(0);
1844 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1845 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1846 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1847 !isHWTrueValue(SelectCC.getOperand(2)) ||
1848 !isHWFalseValue(SelectCC.getOperand(3))) {
1849 return SDValue();
1850 }
1851
Andrew Trickef9de2a2013-05-25 02:42:55 +00001852 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001853 SelectCC.getOperand(0), // LHS
1854 SelectCC.getOperand(1), // RHS
1855 DAG.getConstant(-1, MVT::i32), // True
1856 DAG.getConstant(0, MVT::i32), // Flase
1857 SelectCC.getOperand(4)); // CC
1858
1859 break;
1860 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001861
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001862 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1863 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001864 case ISD::INSERT_VECTOR_ELT: {
1865 SDValue InVec = N->getOperand(0);
1866 SDValue InVal = N->getOperand(1);
1867 SDValue EltNo = N->getOperand(2);
1868 SDLoc dl(N);
1869
1870 // If the inserted element is an UNDEF, just use the input vector.
1871 if (InVal.getOpcode() == ISD::UNDEF)
1872 return InVec;
1873
1874 EVT VT = InVec.getValueType();
1875
1876 // If we can't generate a legal BUILD_VECTOR, exit
1877 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1878 return SDValue();
1879
1880 // Check that we know which element is being inserted
1881 if (!isa<ConstantSDNode>(EltNo))
1882 return SDValue();
1883 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1884
1885 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1886 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1887 // vector elements.
1888 SmallVector<SDValue, 8> Ops;
1889 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1890 Ops.append(InVec.getNode()->op_begin(),
1891 InVec.getNode()->op_end());
1892 } else if (InVec.getOpcode() == ISD::UNDEF) {
1893 unsigned NElts = VT.getVectorNumElements();
1894 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1895 } else {
1896 return SDValue();
1897 }
1898
1899 // Insert the element
1900 if (Elt < Ops.size()) {
1901 // All the operands of BUILD_VECTOR must have the same type;
1902 // we enforce that here.
1903 EVT OpVT = Ops[0].getValueType();
1904 if (InVal.getValueType() != OpVT)
1905 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1906 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1907 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1908 Ops[Elt] = InVal;
1909 }
1910
1911 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001912 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001913 }
1914
Tom Stellard365366f2013-01-23 02:09:06 +00001915 // Extract_vec (Build_vector) generated by custom lowering
1916 // also needs to be customly combined
1917 case ISD::EXTRACT_VECTOR_ELT: {
1918 SDValue Arg = N->getOperand(0);
1919 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1920 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1921 unsigned Element = Const->getZExtValue();
1922 return Arg->getOperand(Element);
1923 }
1924 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001925 if (Arg.getOpcode() == ISD::BITCAST &&
1926 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1927 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1928 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001929 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001930 Arg->getOperand(0).getOperand(Element));
1931 }
1932 }
Tom Stellard365366f2013-01-23 02:09:06 +00001933 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001934
1935 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001936 // Try common optimizations
1937 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1938 if (Ret.getNode())
1939 return Ret;
1940
Tom Stellarde06163a2013-02-07 14:02:35 +00001941 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1942 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001943 //
1944 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1945 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001946 SDValue LHS = N->getOperand(0);
1947 if (LHS.getOpcode() != ISD::SELECT_CC) {
1948 return SDValue();
1949 }
1950
1951 SDValue RHS = N->getOperand(1);
1952 SDValue True = N->getOperand(2);
1953 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001954 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001955
1956 if (LHS.getOperand(2).getNode() != True.getNode() ||
1957 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001958 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001959 return SDValue();
1960 }
1961
Tom Stellard5e524892013-03-08 15:37:11 +00001962 switch (NCC) {
1963 default: return SDValue();
1964 case ISD::SETNE: return LHS;
1965 case ISD::SETEQ: {
1966 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1967 LHSCC = ISD::getSetCCInverse(LHSCC,
1968 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001969 if (DCI.isBeforeLegalizeOps() ||
1970 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1971 return DAG.getSelectCC(SDLoc(N),
1972 LHS.getOperand(0),
1973 LHS.getOperand(1),
1974 LHS.getOperand(2),
1975 LHS.getOperand(3),
1976 LHSCC);
1977 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001978 }
Tom Stellard5e524892013-03-08 15:37:11 +00001979 }
Tom Stellardcd428182013-09-28 02:50:38 +00001980 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001981 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001982
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001983 case AMDGPUISD::EXPORT: {
1984 SDValue Arg = N->getOperand(1);
1985 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1986 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001987
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001988 SDValue NewArgs[8] = {
1989 N->getOperand(0), // Chain
1990 SDValue(),
1991 N->getOperand(2), // ArrayBase
1992 N->getOperand(3), // Type
1993 N->getOperand(4), // SWZ_X
1994 N->getOperand(5), // SWZ_Y
1995 N->getOperand(6), // SWZ_Z
1996 N->getOperand(7) // SWZ_W
1997 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001998 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001999 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002000 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002001 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002002 case AMDGPUISD::TEXTURE_FETCH: {
2003 SDValue Arg = N->getOperand(1);
2004 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2005 break;
2006
2007 SDValue NewArgs[19] = {
2008 N->getOperand(0),
2009 N->getOperand(1),
2010 N->getOperand(2),
2011 N->getOperand(3),
2012 N->getOperand(4),
2013 N->getOperand(5),
2014 N->getOperand(6),
2015 N->getOperand(7),
2016 N->getOperand(8),
2017 N->getOperand(9),
2018 N->getOperand(10),
2019 N->getOperand(11),
2020 N->getOperand(12),
2021 N->getOperand(13),
2022 N->getOperand(14),
2023 N->getOperand(15),
2024 N->getOperand(16),
2025 N->getOperand(17),
2026 N->getOperand(18),
2027 };
2028 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2029 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002030 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002031 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002032 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002033
2034 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002035}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002036
2037static bool
2038FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002039 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002040 const R600InstrInfo *TII =
2041 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2042 if (!Src.isMachineOpcode())
2043 return false;
2044 switch (Src.getMachineOpcode()) {
2045 case AMDGPU::FNEG_R600:
2046 if (!Neg.getNode())
2047 return false;
2048 Src = Src.getOperand(0);
2049 Neg = DAG.getTargetConstant(1, MVT::i32);
2050 return true;
2051 case AMDGPU::FABS_R600:
2052 if (!Abs.getNode())
2053 return false;
2054 Src = Src.getOperand(0);
2055 Abs = DAG.getTargetConstant(1, MVT::i32);
2056 return true;
2057 case AMDGPU::CONST_COPY: {
2058 unsigned Opcode = ParentNode->getMachineOpcode();
2059 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2060
2061 if (!Sel.getNode())
2062 return false;
2063
2064 SDValue CstOffset = Src.getOperand(0);
2065 if (ParentNode->getValueType(0).isVector())
2066 return false;
2067
2068 // Gather constants values
2069 int SrcIndices[] = {
2070 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2071 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2072 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2073 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2074 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2075 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2076 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2077 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2078 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2079 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2080 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2081 };
2082 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002083 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002084 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2085 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2086 continue;
2087 if (HasDst) {
2088 OtherSrcIdx--;
2089 OtherSelIdx--;
2090 }
2091 if (RegisterSDNode *Reg =
2092 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2093 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002094 ConstantSDNode *Cst
2095 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002096 Consts.push_back(Cst->getZExtValue());
2097 }
2098 }
2099 }
2100
Matt Arsenault37c12d72014-05-12 20:42:57 +00002101 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002102 Consts.push_back(Cst->getZExtValue());
2103 if (!TII->fitsConstReadLimitations(Consts)) {
2104 return false;
2105 }
2106
2107 Sel = CstOffset;
2108 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2109 return true;
2110 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002111 case AMDGPU::MOV_IMM_I32:
2112 case AMDGPU::MOV_IMM_F32: {
2113 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2114 uint64_t ImmValue = 0;
2115
2116
2117 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2118 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2119 float FloatValue = FPC->getValueAPF().convertToFloat();
2120 if (FloatValue == 0.0) {
2121 ImmReg = AMDGPU::ZERO;
2122 } else if (FloatValue == 0.5) {
2123 ImmReg = AMDGPU::HALF;
2124 } else if (FloatValue == 1.0) {
2125 ImmReg = AMDGPU::ONE;
2126 } else {
2127 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2128 }
2129 } else {
2130 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2131 uint64_t Value = C->getZExtValue();
2132 if (Value == 0) {
2133 ImmReg = AMDGPU::ZERO;
2134 } else if (Value == 1) {
2135 ImmReg = AMDGPU::ONE_INT;
2136 } else {
2137 ImmValue = Value;
2138 }
2139 }
2140
2141 // Check that we aren't already using an immediate.
2142 // XXX: It's possible for an instruction to have more than one
2143 // immediate operand, but this is not supported yet.
2144 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2145 if (!Imm.getNode())
2146 return false;
2147 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2148 assert(C);
2149 if (C->getZExtValue())
2150 return false;
2151 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2152 }
2153 Src = DAG.getRegister(ImmReg, MVT::i32);
2154 return true;
2155 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002156 default:
2157 return false;
2158 }
2159}
2160
2161
2162/// \brief Fold the instructions after selecting them
2163SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2164 SelectionDAG &DAG) const {
2165 const R600InstrInfo *TII =
2166 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2167 if (!Node->isMachineOpcode())
2168 return Node;
2169 unsigned Opcode = Node->getMachineOpcode();
2170 SDValue FakeOp;
2171
2172 std::vector<SDValue> Ops;
2173 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
2174 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002175 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002176
2177 if (Opcode == AMDGPU::DOT_4) {
2178 int OperandIdx[] = {
2179 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2180 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2181 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2182 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2183 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2184 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2185 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2186 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002187 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002188 int NegIdx[] = {
2189 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2197 };
2198 int AbsIdx[] = {
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2207 };
2208 for (unsigned i = 0; i < 8; i++) {
2209 if (OperandIdx[i] < 0)
2210 return Node;
2211 SDValue &Src = Ops[OperandIdx[i] - 1];
2212 SDValue &Neg = Ops[NegIdx[i] - 1];
2213 SDValue &Abs = Ops[AbsIdx[i] - 1];
2214 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2215 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2216 if (HasDst)
2217 SelIdx--;
2218 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002219 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2220 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2221 }
2222 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2223 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2224 SDValue &Src = Ops[i];
2225 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002226 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2227 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002228 } else if (Opcode == AMDGPU::CLAMP_R600) {
2229 SDValue Src = Node->getOperand(0);
2230 if (!Src.isMachineOpcode() ||
2231 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2232 return Node;
2233 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2234 AMDGPU::OpName::clamp);
2235 if (ClampIdx < 0)
2236 return Node;
2237 std::vector<SDValue> Ops;
2238 unsigned NumOp = Src.getNumOperands();
2239 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002240 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002241 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2242 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2243 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002244 } else {
2245 if (!TII->hasInstrModifiers(Opcode))
2246 return Node;
2247 int OperandIdx[] = {
2248 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2249 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2250 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2251 };
2252 int NegIdx[] = {
2253 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2254 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2255 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2256 };
2257 int AbsIdx[] = {
2258 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2259 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2260 -1
2261 };
2262 for (unsigned i = 0; i < 3; i++) {
2263 if (OperandIdx[i] < 0)
2264 return Node;
2265 SDValue &Src = Ops[OperandIdx[i] - 1];
2266 SDValue &Neg = Ops[NegIdx[i] - 1];
2267 SDValue FakeAbs;
2268 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2269 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2270 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002271 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2272 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002273 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002274 ImmIdx--;
2275 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002276 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002277 SDValue &Imm = Ops[ImmIdx];
2278 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002279 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2280 }
2281 }
2282
2283 return Node;
2284}