blob: 87610e9cc559c8c79c5e4a1a8d61b740754e0fee [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
33R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000034 AMDGPUTargetLowering(TM),
35 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Tom Stellard75aadc22012-12-11 21:25:42 +000043 computeRegisterProperties();
44
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Matt Arsenault4e466652014-04-16 01:41:30 +000094 // Expand sign extension of vectors
95 if (!Subtarget->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
97
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
105
106 if (!Subtarget->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
114
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
116
117
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000120 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000121 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000125 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
127 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
128 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000129 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
130 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
131
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000132 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000134 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000135 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000136 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
137 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000138
Tom Stellard365366f2013-01-23 02:09:06 +0000139 setOperationAction(ISD::LOAD, MVT::i32, Custom);
140 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000141 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
142
Tom Stellard880a80a2014-06-17 16:53:14 +0000143 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
144 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
145 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
146 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
147
148 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
149 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
150 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
151 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
152
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000154 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000155 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000156 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000157 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000158
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000159 setOperationAction(ISD::SUB, MVT::i64, Expand);
160
Tom Stellard5f337882014-04-29 23:12:43 +0000161 // These should be replaced by UDVIREM, but it does not happen automatically
162 // during Type Legalization
163 setOperationAction(ISD::UDIV, MVT::i64, Custom);
164 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000165 setOperationAction(ISD::SDIV, MVT::i64, Custom);
166 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000167
Jan Vesely25f36272014-06-18 12:27:13 +0000168 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
169 // to be Legal/Custom in order to avoid library calls.
170 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000171 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000172 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000173
Michel Danzer49812b52013-07-10 16:37:07 +0000174 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
175
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000176 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
177 for (MVT VT : ScalarIntVTs) {
178 setOperationAction(ISD::ADDC, VT, Expand);
179 setOperationAction(ISD::SUBC, VT, Expand);
180 setOperationAction(ISD::ADDE, VT, Expand);
181 setOperationAction(ISD::SUBE, VT, Expand);
182 }
183
Tom Stellardb852af52013-03-08 15:37:03 +0000184 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000185 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000186 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000187}
188
189MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
190 MachineInstr * MI, MachineBasicBlock * BB) const {
191 MachineFunction * MF = BB->getParent();
192 MachineRegisterInfo &MRI = MF->getRegInfo();
193 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000194 const R600InstrInfo *TII =
195 static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000196
197 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000198 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000199 // Replace LDS_*_RET instruction that don't have any uses with the
200 // equivalent LDS_*_NORET instruction.
201 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000202 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
203 assert(DstIdx != -1);
204 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000205 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
206 // LDS_1A2D support and remove this special case.
207 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
208 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000209 return BB;
210
211 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
212 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000213 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
214 NewMI.addOperand(MI->getOperand(i));
215 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000216 } else {
217 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
218 }
219 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000220 case AMDGPU::CLAMP_R600: {
221 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
222 AMDGPU::MOV,
223 MI->getOperand(0).getReg(),
224 MI->getOperand(1).getReg());
225 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
226 break;
227 }
228
229 case AMDGPU::FABS_R600: {
230 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
231 AMDGPU::MOV,
232 MI->getOperand(0).getReg(),
233 MI->getOperand(1).getReg());
234 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
235 break;
236 }
237
238 case AMDGPU::FNEG_R600: {
239 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
240 AMDGPU::MOV,
241 MI->getOperand(0).getReg(),
242 MI->getOperand(1).getReg());
243 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
244 break;
245 }
246
Tom Stellard75aadc22012-12-11 21:25:42 +0000247 case AMDGPU::MASK_WRITE: {
248 unsigned maskedRegister = MI->getOperand(0).getReg();
249 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
250 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
251 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
252 break;
253 }
254
255 case AMDGPU::MOV_IMM_F32:
256 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
257 MI->getOperand(1).getFPImm()->getValueAPF()
258 .bitcastToAPInt().getZExtValue());
259 break;
260 case AMDGPU::MOV_IMM_I32:
261 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
262 MI->getOperand(1).getImm());
263 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000264 case AMDGPU::CONST_COPY: {
265 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
266 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000267 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000268 MI->getOperand(1).getImm());
269 break;
270 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000271
272 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000273 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000275 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000276
277 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
278 .addOperand(MI->getOperand(0))
279 .addOperand(MI->getOperand(1))
280 .addImm(EOP); // Set End of program bit
281 break;
282 }
283
Tom Stellard75aadc22012-12-11 21:25:42 +0000284 case AMDGPU::TXD: {
285 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
286 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000287 MachineOperand &RID = MI->getOperand(4);
288 MachineOperand &SID = MI->getOperand(5);
289 unsigned TextureId = MI->getOperand(6).getImm();
290 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
291 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000292
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000293 switch (TextureId) {
294 case 5: // Rect
295 CTX = CTY = 0;
296 break;
297 case 6: // Shadow1D
298 SrcW = SrcZ;
299 break;
300 case 7: // Shadow2D
301 SrcW = SrcZ;
302 break;
303 case 8: // ShadowRect
304 CTX = CTY = 0;
305 SrcW = SrcZ;
306 break;
307 case 9: // 1DArray
308 SrcZ = SrcY;
309 CTZ = 0;
310 break;
311 case 10: // 2DArray
312 CTZ = 0;
313 break;
314 case 11: // Shadow1DArray
315 SrcZ = SrcY;
316 CTZ = 0;
317 break;
318 case 12: // Shadow2DArray
319 CTZ = 0;
320 break;
321 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
323 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000324 .addImm(SrcX)
325 .addImm(SrcY)
326 .addImm(SrcZ)
327 .addImm(SrcW)
328 .addImm(0)
329 .addImm(0)
330 .addImm(0)
331 .addImm(0)
332 .addImm(1)
333 .addImm(2)
334 .addImm(3)
335 .addOperand(RID)
336 .addOperand(SID)
337 .addImm(CTX)
338 .addImm(CTY)
339 .addImm(CTZ)
340 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000341 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
342 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000343 .addImm(SrcX)
344 .addImm(SrcY)
345 .addImm(SrcZ)
346 .addImm(SrcW)
347 .addImm(0)
348 .addImm(0)
349 .addImm(0)
350 .addImm(0)
351 .addImm(1)
352 .addImm(2)
353 .addImm(3)
354 .addOperand(RID)
355 .addOperand(SID)
356 .addImm(CTX)
357 .addImm(CTY)
358 .addImm(CTZ)
359 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000360 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
361 .addOperand(MI->getOperand(0))
362 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000363 .addImm(SrcX)
364 .addImm(SrcY)
365 .addImm(SrcZ)
366 .addImm(SrcW)
367 .addImm(0)
368 .addImm(0)
369 .addImm(0)
370 .addImm(0)
371 .addImm(1)
372 .addImm(2)
373 .addImm(3)
374 .addOperand(RID)
375 .addOperand(SID)
376 .addImm(CTX)
377 .addImm(CTY)
378 .addImm(CTZ)
379 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000380 .addReg(T0, RegState::Implicit)
381 .addReg(T1, RegState::Implicit);
382 break;
383 }
384
385 case AMDGPU::TXD_SHADOW: {
386 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
387 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000388 MachineOperand &RID = MI->getOperand(4);
389 MachineOperand &SID = MI->getOperand(5);
390 unsigned TextureId = MI->getOperand(6).getImm();
391 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
392 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
393
394 switch (TextureId) {
395 case 5: // Rect
396 CTX = CTY = 0;
397 break;
398 case 6: // Shadow1D
399 SrcW = SrcZ;
400 break;
401 case 7: // Shadow2D
402 SrcW = SrcZ;
403 break;
404 case 8: // ShadowRect
405 CTX = CTY = 0;
406 SrcW = SrcZ;
407 break;
408 case 9: // 1DArray
409 SrcZ = SrcY;
410 CTZ = 0;
411 break;
412 case 10: // 2DArray
413 CTZ = 0;
414 break;
415 case 11: // Shadow1DArray
416 SrcZ = SrcY;
417 CTZ = 0;
418 break;
419 case 12: // Shadow2DArray
420 CTZ = 0;
421 break;
422 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000423
424 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
425 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000426 .addImm(SrcX)
427 .addImm(SrcY)
428 .addImm(SrcZ)
429 .addImm(SrcW)
430 .addImm(0)
431 .addImm(0)
432 .addImm(0)
433 .addImm(0)
434 .addImm(1)
435 .addImm(2)
436 .addImm(3)
437 .addOperand(RID)
438 .addOperand(SID)
439 .addImm(CTX)
440 .addImm(CTY)
441 .addImm(CTZ)
442 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
444 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000445 .addImm(SrcX)
446 .addImm(SrcY)
447 .addImm(SrcZ)
448 .addImm(SrcW)
449 .addImm(0)
450 .addImm(0)
451 .addImm(0)
452 .addImm(0)
453 .addImm(1)
454 .addImm(2)
455 .addImm(3)
456 .addOperand(RID)
457 .addOperand(SID)
458 .addImm(CTX)
459 .addImm(CTY)
460 .addImm(CTZ)
461 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000462 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
463 .addOperand(MI->getOperand(0))
464 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000465 .addImm(SrcX)
466 .addImm(SrcY)
467 .addImm(SrcZ)
468 .addImm(SrcW)
469 .addImm(0)
470 .addImm(0)
471 .addImm(0)
472 .addImm(0)
473 .addImm(1)
474 .addImm(2)
475 .addImm(3)
476 .addOperand(RID)
477 .addOperand(SID)
478 .addImm(CTX)
479 .addImm(CTY)
480 .addImm(CTZ)
481 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 .addReg(T0, RegState::Implicit)
483 .addReg(T1, RegState::Implicit);
484 break;
485 }
486
487 case AMDGPU::BRANCH:
488 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000489 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000490 break;
491
492 case AMDGPU::BRANCH_COND_f32: {
493 MachineInstr *NewMI =
494 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
495 AMDGPU::PREDICATE_BIT)
496 .addOperand(MI->getOperand(1))
497 .addImm(OPCODE_IS_NOT_ZERO)
498 .addImm(0); // Flags
499 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000500 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000501 .addOperand(MI->getOperand(0))
502 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
503 break;
504 }
505
506 case AMDGPU::BRANCH_COND_i32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO_INT)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 case AMDGPU::EG_ExportSwz:
521 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000522 // Instruction is left unmodified if its not the last one of its type
523 bool isLastInstructionOfItsType = true;
524 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000525 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000526 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000527 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000528 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
529 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
530 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
531 .getImm();
532 if (CurrentInstExportType == InstExportType) {
533 isLastInstructionOfItsType = false;
534 break;
535 }
536 }
537 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000538 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000539 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000540 return BB;
541 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
542 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
543 .addOperand(MI->getOperand(0))
544 .addOperand(MI->getOperand(1))
545 .addOperand(MI->getOperand(2))
546 .addOperand(MI->getOperand(3))
547 .addOperand(MI->getOperand(4))
548 .addOperand(MI->getOperand(5))
549 .addOperand(MI->getOperand(6))
550 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000551 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000552 break;
553 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000554 case AMDGPU::RETURN: {
555 // RETURN instructions must have the live-out registers as implicit uses,
556 // otherwise they appear dead.
557 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
558 MachineInstrBuilder MIB(*MF, MI);
559 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
560 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
561 return BB;
562 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000563 }
564
565 MI->eraseFromParent();
566 return BB;
567}
568
569//===----------------------------------------------------------------------===//
570// Custom DAG Lowering Operations
571//===----------------------------------------------------------------------===//
572
Tom Stellard75aadc22012-12-11 21:25:42 +0000573SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000574 MachineFunction &MF = DAG.getMachineFunction();
575 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000576 switch (Op.getOpcode()) {
577 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000578 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
579 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000580 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000581 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000582 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000583 case ISD::FCOS:
584 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000585 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000586 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000587 case ISD::LOAD: {
588 SDValue Result = LowerLOAD(Op, DAG);
589 assert((!Result.getNode() ||
590 Result.getNode()->getNumValues() == 2) &&
591 "Load should return a value and a chain");
592 return Result;
593 }
594
Matt Arsenault1d555c42014-06-23 18:00:55 +0000595 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000596 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000597 case ISD::INTRINSIC_VOID: {
598 SDValue Chain = Op.getOperand(0);
599 unsigned IntrinsicID =
600 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
601 switch (IntrinsicID) {
602 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000603 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
604 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000605 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000606 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000607 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000608 case AMDGPUIntrinsic::R600_store_swizzle: {
609 const SDValue Args[8] = {
610 Chain,
611 Op.getOperand(2), // Export Value
612 Op.getOperand(3), // ArrayBase
613 Op.getOperand(4), // Type
614 DAG.getConstant(0, MVT::i32), // SWZ_X
615 DAG.getConstant(1, MVT::i32), // SWZ_Y
616 DAG.getConstant(2, MVT::i32), // SWZ_Z
617 DAG.getConstant(3, MVT::i32) // SWZ_W
618 };
Craig Topper48d114b2014-04-26 18:35:24 +0000619 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000621
Tom Stellard75aadc22012-12-11 21:25:42 +0000622 // default for switch(IntrinsicID)
623 default: break;
624 }
625 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
626 break;
627 }
628 case ISD::INTRINSIC_WO_CHAIN: {
629 unsigned IntrinsicID =
630 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
631 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000632 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000633 switch(IntrinsicID) {
634 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000635 case AMDGPUIntrinsic::R600_load_input: {
636 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
637 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
638 MachineFunction &MF = DAG.getMachineFunction();
639 MachineRegisterInfo &MRI = MF.getRegInfo();
640 MRI.addLiveIn(Reg);
641 return DAG.getCopyFromReg(DAG.getEntryNode(),
642 SDLoc(DAG.getEntryNode()), Reg, VT);
643 }
644
645 case AMDGPUIntrinsic::R600_interp_input: {
646 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
647 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
648 MachineSDNode *interp;
649 if (ijb < 0) {
650 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +0000651 const R600InstrInfo *TII = static_cast<const R600InstrInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000652 MF.getSubtarget().getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000653 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
654 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
655 return DAG.getTargetExtractSubreg(
656 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
657 DL, MVT::f32, SDValue(interp, 0));
658 }
659 MachineFunction &MF = DAG.getMachineFunction();
660 MachineRegisterInfo &MRI = MF.getRegInfo();
661 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
662 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
663 MRI.addLiveIn(RegisterI);
664 MRI.addLiveIn(RegisterJ);
665 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
666 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
667 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
668 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
669
670 if (slot % 4 < 2)
671 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
672 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
673 RegisterJNode, RegisterINode);
674 else
675 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
676 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
677 RegisterJNode, RegisterINode);
678 return SDValue(interp, slot % 2);
679 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000680 case AMDGPUIntrinsic::R600_interp_xy:
681 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000682 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000683 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000684 SDValue RegisterINode = Op.getOperand(2);
685 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000686
Vincent Lejeunef143af32013-11-11 22:10:24 +0000687 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000688 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000689 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000690 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000691 else
692 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000693 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000694 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000695 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
696 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000697 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000698 case AMDGPUIntrinsic::R600_tex:
699 case AMDGPUIntrinsic::R600_texc:
700 case AMDGPUIntrinsic::R600_txl:
701 case AMDGPUIntrinsic::R600_txlc:
702 case AMDGPUIntrinsic::R600_txb:
703 case AMDGPUIntrinsic::R600_txbc:
704 case AMDGPUIntrinsic::R600_txf:
705 case AMDGPUIntrinsic::R600_txq:
706 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000707 case AMDGPUIntrinsic::R600_ddy:
708 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000709 unsigned TextureOp;
710 switch (IntrinsicID) {
711 case AMDGPUIntrinsic::R600_tex:
712 TextureOp = 0;
713 break;
714 case AMDGPUIntrinsic::R600_texc:
715 TextureOp = 1;
716 break;
717 case AMDGPUIntrinsic::R600_txl:
718 TextureOp = 2;
719 break;
720 case AMDGPUIntrinsic::R600_txlc:
721 TextureOp = 3;
722 break;
723 case AMDGPUIntrinsic::R600_txb:
724 TextureOp = 4;
725 break;
726 case AMDGPUIntrinsic::R600_txbc:
727 TextureOp = 5;
728 break;
729 case AMDGPUIntrinsic::R600_txf:
730 TextureOp = 6;
731 break;
732 case AMDGPUIntrinsic::R600_txq:
733 TextureOp = 7;
734 break;
735 case AMDGPUIntrinsic::R600_ddx:
736 TextureOp = 8;
737 break;
738 case AMDGPUIntrinsic::R600_ddy:
739 TextureOp = 9;
740 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000741 case AMDGPUIntrinsic::R600_ldptr:
742 TextureOp = 10;
743 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000744 default:
745 llvm_unreachable("Unknow Texture Operation");
746 }
747
748 SDValue TexArgs[19] = {
749 DAG.getConstant(TextureOp, MVT::i32),
750 Op.getOperand(1),
751 DAG.getConstant(0, MVT::i32),
752 DAG.getConstant(1, MVT::i32),
753 DAG.getConstant(2, MVT::i32),
754 DAG.getConstant(3, MVT::i32),
755 Op.getOperand(2),
756 Op.getOperand(3),
757 Op.getOperand(4),
758 DAG.getConstant(0, MVT::i32),
759 DAG.getConstant(1, MVT::i32),
760 DAG.getConstant(2, MVT::i32),
761 DAG.getConstant(3, MVT::i32),
762 Op.getOperand(5),
763 Op.getOperand(6),
764 Op.getOperand(7),
765 Op.getOperand(8),
766 Op.getOperand(9),
767 Op.getOperand(10)
768 };
Craig Topper48d114b2014-04-26 18:35:24 +0000769 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000770 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000771 case AMDGPUIntrinsic::AMDGPU_dp4: {
772 SDValue Args[8] = {
773 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
774 DAG.getConstant(0, MVT::i32)),
775 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
776 DAG.getConstant(0, MVT::i32)),
777 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
778 DAG.getConstant(1, MVT::i32)),
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
780 DAG.getConstant(1, MVT::i32)),
781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
782 DAG.getConstant(2, MVT::i32)),
783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
784 DAG.getConstant(2, MVT::i32)),
785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
786 DAG.getConstant(3, MVT::i32)),
787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
788 DAG.getConstant(3, MVT::i32))
789 };
Craig Topper48d114b2014-04-26 18:35:24 +0000790 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000791 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000792
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 8);
811
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000812 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000813 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
814 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
817 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000818 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
820 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000821 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
823 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000824 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000825 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
826 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000827 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000828 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
829 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000830 case Intrinsic::AMDGPU_rsq:
831 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
832 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000833 }
834 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
835 break;
836 }
837 } // end switch(Op.getOpcode())
838 return SDValue();
839}
840
841void R600TargetLowering::ReplaceNodeResults(SDNode *N,
842 SmallVectorImpl<SDValue> &Results,
843 SelectionDAG &DAG) const {
844 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000845 default:
846 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
847 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000848 case ISD::FP_TO_UINT:
849 if (N->getValueType(0) == MVT::i1) {
850 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
851 return;
852 }
853 // Fall-through. Since we don't care about out of bounds values
854 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
855 // considers some extra cases which are not necessary here.
856 case ISD::FP_TO_SINT: {
857 SDValue Result;
858 if (expandFP_TO_SINT(N, Result, DAG))
859 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000860 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000861 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000862 case ISD::UDIV: {
863 SDValue Op = SDValue(N, 0);
864 SDLoc DL(Op);
865 EVT VT = Op.getValueType();
866 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
867 N->getOperand(0), N->getOperand(1));
868 Results.push_back(UDIVREM);
869 break;
870 }
871 case ISD::UREM: {
872 SDValue Op = SDValue(N, 0);
873 SDLoc DL(Op);
874 EVT VT = Op.getValueType();
875 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
876 N->getOperand(0), N->getOperand(1));
877 Results.push_back(UDIVREM.getValue(1));
878 break;
879 }
880 case ISD::SDIV: {
881 SDValue Op = SDValue(N, 0);
882 SDLoc DL(Op);
883 EVT VT = Op.getValueType();
884 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
885 N->getOperand(0), N->getOperand(1));
886 Results.push_back(SDIVREM);
887 break;
888 }
889 case ISD::SREM: {
890 SDValue Op = SDValue(N, 0);
891 SDLoc DL(Op);
892 EVT VT = Op.getValueType();
893 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
894 N->getOperand(0), N->getOperand(1));
895 Results.push_back(SDIVREM.getValue(1));
896 break;
897 }
898 case ISD::SDIVREM: {
899 SDValue Op = SDValue(N, 1);
900 SDValue RES = LowerSDIVREM(Op, DAG);
901 Results.push_back(RES);
902 Results.push_back(RES.getValue(1));
903 break;
904 }
905 case ISD::UDIVREM: {
906 SDValue Op = SDValue(N, 0);
907 SDLoc DL(Op);
908 EVT VT = Op.getValueType();
909 EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
910
911 SDValue one = DAG.getConstant(1, HalfVT);
912 SDValue zero = DAG.getConstant(0, HalfVT);
913
914 //HiLo split
915 SDValue LHS = N->getOperand(0);
916 SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
917 SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
918
919 SDValue RHS = N->getOperand(1);
920 SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
921 SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
922
923 // Get Speculative values
924 SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
925 SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
926
927 SDValue REM_Hi = zero;
928 SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
929
930 SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
931 SDValue DIV_Lo = zero;
932
933 const unsigned halfBitWidth = HalfVT.getSizeInBits();
934
935 for (unsigned i = 0; i < halfBitWidth; ++i) {
936 SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
937 // Get Value of high bit
938 SDValue HBit;
939 if (halfBitWidth == 32 && Subtarget->hasBFE()) {
940 HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
941 } else {
942 HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
943 HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
944 }
945
946 SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
947 DAG.getConstant(halfBitWidth - 1, HalfVT));
948 REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
949 REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
950
951 REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
952 REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
953
954
955 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
956
957 SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
958 SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
959
960 DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
961
962 // Update REM
963
964 SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
965
966 REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
967 REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
968 REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
969 }
970
971 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
972 SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
973 Results.push_back(DIV);
974 Results.push_back(REM);
975 break;
976 }
977 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000978}
979
Tom Stellard880a80a2014-06-17 16:53:14 +0000980SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
981 SDValue Vector) const {
982
983 SDLoc DL(Vector);
984 EVT VecVT = Vector.getValueType();
985 EVT EltVT = VecVT.getVectorElementType();
986 SmallVector<SDValue, 8> Args;
987
988 for (unsigned i = 0, e = VecVT.getVectorNumElements();
989 i != e; ++i) {
990 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
991 Vector, DAG.getConstant(i, getVectorIdxTy())));
992 }
993
994 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
995}
996
997SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
998 SelectionDAG &DAG) const {
999
1000 SDLoc DL(Op);
1001 SDValue Vector = Op.getOperand(0);
1002 SDValue Index = Op.getOperand(1);
1003
1004 if (isa<ConstantSDNode>(Index) ||
1005 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1006 return Op;
1007
1008 Vector = vectorToVerticalVector(DAG, Vector);
1009 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
1010 Vector, Index);
1011}
1012
1013SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1014 SelectionDAG &DAG) const {
1015 SDLoc DL(Op);
1016 SDValue Vector = Op.getOperand(0);
1017 SDValue Value = Op.getOperand(1);
1018 SDValue Index = Op.getOperand(2);
1019
1020 if (isa<ConstantSDNode>(Index) ||
1021 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1022 return Op;
1023
1024 Vector = vectorToVerticalVector(DAG, Vector);
1025 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
1026 Vector, Value, Index);
1027 return vectorToVerticalVector(DAG, Insert);
1028}
1029
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001030SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1031 // On hw >= R700, COS/SIN input must be between -1. and 1.
1032 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
1033 EVT VT = Op.getValueType();
1034 SDValue Arg = Op.getOperand(0);
1035 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
1036 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
1037 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
1038 DAG.getConstantFP(0.15915494309, MVT::f32)),
1039 DAG.getConstantFP(0.5, MVT::f32)));
1040 unsigned TrigNode;
1041 switch (Op.getOpcode()) {
1042 case ISD::FCOS:
1043 TrigNode = AMDGPUISD::COS_HW;
1044 break;
1045 case ISD::FSIN:
1046 TrigNode = AMDGPUISD::SIN_HW;
1047 break;
1048 default:
1049 llvm_unreachable("Wrong trig opcode");
1050 }
1051 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
1052 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
1053 DAG.getConstantFP(-0.5, MVT::f32)));
1054 if (Gen >= AMDGPUSubtarget::R700)
1055 return TrigVal;
1056 // On R600 hw, COS/SIN input must be between -Pi and Pi.
1057 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1058 DAG.getConstantFP(3.14159265359, MVT::f32));
1059}
1060
Jan Vesely25f36272014-06-18 12:27:13 +00001061SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1062 SDLoc DL(Op);
1063 EVT VT = Op.getValueType();
1064
1065 SDValue Lo = Op.getOperand(0);
1066 SDValue Hi = Op.getOperand(1);
1067 SDValue Shift = Op.getOperand(2);
1068 SDValue Zero = DAG.getConstant(0, VT);
1069 SDValue One = DAG.getConstant(1, VT);
1070
1071 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1072 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1073 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1074 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1075
1076 // The dance around Width1 is necessary for 0 special case.
1077 // Without it the CompShift might be 32, producing incorrect results in
1078 // Overflow. So we do the shift in two steps, the alternative is to
1079 // add a conditional to filter the special case.
1080
1081 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1082 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1083
1084 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1085 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1086 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1087
1088 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1089 SDValue LoBig = Zero;
1090
1091 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1092 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1093
1094 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1095}
1096
Jan Vesely900ff2e2014-06-18 12:27:15 +00001097SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1098 SDLoc DL(Op);
1099 EVT VT = Op.getValueType();
1100
1101 SDValue Lo = Op.getOperand(0);
1102 SDValue Hi = Op.getOperand(1);
1103 SDValue Shift = Op.getOperand(2);
1104 SDValue Zero = DAG.getConstant(0, VT);
1105 SDValue One = DAG.getConstant(1, VT);
1106
Jan Veselyecf51332014-06-18 12:27:17 +00001107 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1108
Jan Vesely900ff2e2014-06-18 12:27:15 +00001109 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1110 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1111 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1112 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1113
1114 // The dance around Width1 is necessary for 0 special case.
1115 // Without it the CompShift might be 32, producing incorrect results in
1116 // Overflow. So we do the shift in two steps, the alternative is to
1117 // add a conditional to filter the special case.
1118
1119 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1120 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1121
Jan Veselyecf51332014-06-18 12:27:17 +00001122 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001123 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1124 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1125
Jan Veselyecf51332014-06-18 12:27:17 +00001126 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1127 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001128
1129 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1130 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1131
1132 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1133}
1134
Tom Stellard75aadc22012-12-11 21:25:42 +00001135SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1136 return DAG.getNode(
1137 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001138 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001139 MVT::i1,
1140 Op, DAG.getConstantFP(0.0f, MVT::f32),
1141 DAG.getCondCode(ISD::SETNE)
1142 );
1143}
1144
Tom Stellard75aadc22012-12-11 21:25:42 +00001145SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001146 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001147 unsigned DwordOffset) const {
1148 unsigned ByteOffset = DwordOffset * 4;
1149 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001150 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001151
1152 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1153 assert(isInt<16>(ByteOffset));
1154
1155 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1156 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1157 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1158 false, false, false, 0);
1159}
1160
Tom Stellard75aadc22012-12-11 21:25:42 +00001161bool R600TargetLowering::isZero(SDValue Op) const {
1162 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1163 return Cst->isNullValue();
1164 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1165 return CstFP->isZero();
1166 } else {
1167 return false;
1168 }
1169}
1170
1171SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001172 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001173 EVT VT = Op.getValueType();
1174
1175 SDValue LHS = Op.getOperand(0);
1176 SDValue RHS = Op.getOperand(1);
1177 SDValue True = Op.getOperand(2);
1178 SDValue False = Op.getOperand(3);
1179 SDValue CC = Op.getOperand(4);
1180 SDValue Temp;
1181
1182 // LHS and RHS are guaranteed to be the same value type
1183 EVT CompareVT = LHS.getValueType();
1184
1185 // Check if we can lower this to a native operation.
1186
Tom Stellard2add82d2013-03-08 15:37:09 +00001187 // Try to lower to a SET* instruction:
1188 //
1189 // SET* can match the following patterns:
1190 //
Tom Stellardcd428182013-09-28 02:50:38 +00001191 // select_cc f32, f32, -1, 0, cc_supported
1192 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1193 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001194 //
1195
1196 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001197 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1198 ISD::CondCode InverseCC =
1199 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001200 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1201 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1202 std::swap(False, True);
1203 CC = DAG.getCondCode(InverseCC);
1204 } else {
1205 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1206 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1207 std::swap(False, True);
1208 std::swap(LHS, RHS);
1209 CC = DAG.getCondCode(SwapInvCC);
1210 }
1211 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001212 }
1213
1214 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1215 (CompareVT == VT || VT == MVT::i32)) {
1216 // This can be matched by a SET* instruction.
1217 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1218 }
1219
Tom Stellard75aadc22012-12-11 21:25:42 +00001220 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001221 //
1222 // CND* can match the following patterns:
1223 //
Tom Stellardcd428182013-09-28 02:50:38 +00001224 // select_cc f32, 0.0, f32, f32, cc_supported
1225 // select_cc f32, 0.0, i32, i32, cc_supported
1226 // select_cc i32, 0, f32, f32, cc_supported
1227 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001228 //
Tom Stellardcd428182013-09-28 02:50:38 +00001229
1230 // Try to move the zero value to the RHS
1231 if (isZero(LHS)) {
1232 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1233 // Try swapping the operands
1234 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1235 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1236 std::swap(LHS, RHS);
1237 CC = DAG.getCondCode(CCSwapped);
1238 } else {
1239 // Try inverting the conditon and then swapping the operands
1240 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1241 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1242 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1243 std::swap(True, False);
1244 std::swap(LHS, RHS);
1245 CC = DAG.getCondCode(CCSwapped);
1246 }
1247 }
1248 }
1249 if (isZero(RHS)) {
1250 SDValue Cond = LHS;
1251 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001252 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1253 if (CompareVT != VT) {
1254 // Bitcast True / False to the correct types. This will end up being
1255 // a nop, but it allows us to define only a single pattern in the
1256 // .TD files for each CND* instruction rather than having to have
1257 // one pattern for integer True/False and one for fp True/False
1258 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1259 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1260 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001261
1262 switch (CCOpcode) {
1263 case ISD::SETONE:
1264 case ISD::SETUNE:
1265 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001266 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1267 Temp = True;
1268 True = False;
1269 False = Temp;
1270 break;
1271 default:
1272 break;
1273 }
1274 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1275 Cond, Zero,
1276 True, False,
1277 DAG.getCondCode(CCOpcode));
1278 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1279 }
1280
Tom Stellard75aadc22012-12-11 21:25:42 +00001281 // If we make it this for it means we have no native instructions to handle
1282 // this SELECT_CC, so we must lower it.
1283 SDValue HWTrue, HWFalse;
1284
1285 if (CompareVT == MVT::f32) {
1286 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1287 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1288 } else if (CompareVT == MVT::i32) {
1289 HWTrue = DAG.getConstant(-1, CompareVT);
1290 HWFalse = DAG.getConstant(0, CompareVT);
1291 }
1292 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001293 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001294 }
1295
1296 // Lower this unsupported SELECT_CC into a combination of two supported
1297 // SELECT_CC operations.
1298 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1299
1300 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1301 Cond, HWFalse,
1302 True, False,
1303 DAG.getCondCode(ISD::SETNE));
1304}
1305
Alp Tokercb402912014-01-24 17:20:08 +00001306/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001307/// convert these pointers to a register index. Each register holds
1308/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1309/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1310/// for indirect addressing.
1311SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1312 unsigned StackWidth,
1313 SelectionDAG &DAG) const {
1314 unsigned SRLPad;
1315 switch(StackWidth) {
1316 case 1:
1317 SRLPad = 2;
1318 break;
1319 case 2:
1320 SRLPad = 3;
1321 break;
1322 case 4:
1323 SRLPad = 4;
1324 break;
1325 default: llvm_unreachable("Invalid stack width");
1326 }
1327
Andrew Trickef9de2a2013-05-25 02:42:55 +00001328 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001329 DAG.getConstant(SRLPad, MVT::i32));
1330}
1331
1332void R600TargetLowering::getStackAddress(unsigned StackWidth,
1333 unsigned ElemIdx,
1334 unsigned &Channel,
1335 unsigned &PtrIncr) const {
1336 switch (StackWidth) {
1337 default:
1338 case 1:
1339 Channel = 0;
1340 if (ElemIdx > 0) {
1341 PtrIncr = 1;
1342 } else {
1343 PtrIncr = 0;
1344 }
1345 break;
1346 case 2:
1347 Channel = ElemIdx % 2;
1348 if (ElemIdx == 2) {
1349 PtrIncr = 1;
1350 } else {
1351 PtrIncr = 0;
1352 }
1353 break;
1354 case 4:
1355 Channel = ElemIdx;
1356 PtrIncr = 0;
1357 break;
1358 }
1359}
1360
Tom Stellard75aadc22012-12-11 21:25:42 +00001361SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001362 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001363 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1364 SDValue Chain = Op.getOperand(0);
1365 SDValue Value = Op.getOperand(1);
1366 SDValue Ptr = Op.getOperand(2);
1367
Tom Stellard2ffc3302013-08-26 15:05:44 +00001368 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001369 if (Result.getNode()) {
1370 return Result;
1371 }
1372
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001373 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1374 if (StoreNode->isTruncatingStore()) {
1375 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001376 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001377 EVT MemVT = StoreNode->getMemoryVT();
1378 SDValue MaskConstant;
1379 if (MemVT == MVT::i8) {
1380 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1381 } else {
1382 assert(MemVT == MVT::i16);
1383 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1384 }
1385 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1386 DAG.getConstant(2, MVT::i32));
1387 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1388 DAG.getConstant(0x00000003, VT));
1389 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1390 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1391 DAG.getConstant(3, VT));
1392 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1393 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1394 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1395 // vector instead.
1396 SDValue Src[4] = {
1397 ShiftedValue,
1398 DAG.getConstant(0, MVT::i32),
1399 DAG.getConstant(0, MVT::i32),
1400 Mask
1401 };
Craig Topper48d114b2014-04-26 18:35:24 +00001402 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001403 SDValue Args[3] = { Chain, Input, DWordAddr };
1404 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001405 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001406 StoreNode->getMemOperand());
1407 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1408 Value.getValueType().bitsGE(MVT::i32)) {
1409 // Convert pointer from byte address to dword address.
1410 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1411 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1412 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001413
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001414 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001415 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001416 } else {
1417 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1418 }
1419 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001420 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001421 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001422
1423 EVT ValueVT = Value.getValueType();
1424
1425 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1426 return SDValue();
1427 }
1428
Tom Stellarde9373602014-01-22 19:24:14 +00001429 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1430 if (Ret.getNode()) {
1431 return Ret;
1432 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001433 // Lowering for indirect addressing
1434
1435 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +00001436 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
1437 getTargetMachine().getSubtargetImpl()->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001438 unsigned StackWidth = TFL->getStackWidth(MF);
1439
1440 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1441
1442 if (ValueVT.isVector()) {
1443 unsigned NumElemVT = ValueVT.getVectorNumElements();
1444 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001445 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001446
1447 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1448 "vector width in load");
1449
1450 for (unsigned i = 0; i < NumElemVT; ++i) {
1451 unsigned Channel, PtrIncr;
1452 getStackAddress(StackWidth, i, Channel, PtrIncr);
1453 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1454 DAG.getConstant(PtrIncr, MVT::i32));
1455 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1456 Value, DAG.getConstant(i, MVT::i32));
1457
1458 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1459 Chain, Elem, Ptr,
1460 DAG.getTargetConstant(Channel, MVT::i32));
1461 }
Craig Topper48d114b2014-04-26 18:35:24 +00001462 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001463 } else {
1464 if (ValueVT == MVT::i8) {
1465 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1466 }
1467 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001468 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001469 }
1470
1471 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001472}
1473
Tom Stellard365366f2013-01-23 02:09:06 +00001474// return (512 + (kc_bank << 12)
1475static int
1476ConstantAddressBlock(unsigned AddressSpace) {
1477 switch (AddressSpace) {
1478 case AMDGPUAS::CONSTANT_BUFFER_0:
1479 return 512;
1480 case AMDGPUAS::CONSTANT_BUFFER_1:
1481 return 512 + 4096;
1482 case AMDGPUAS::CONSTANT_BUFFER_2:
1483 return 512 + 4096 * 2;
1484 case AMDGPUAS::CONSTANT_BUFFER_3:
1485 return 512 + 4096 * 3;
1486 case AMDGPUAS::CONSTANT_BUFFER_4:
1487 return 512 + 4096 * 4;
1488 case AMDGPUAS::CONSTANT_BUFFER_5:
1489 return 512 + 4096 * 5;
1490 case AMDGPUAS::CONSTANT_BUFFER_6:
1491 return 512 + 4096 * 6;
1492 case AMDGPUAS::CONSTANT_BUFFER_7:
1493 return 512 + 4096 * 7;
1494 case AMDGPUAS::CONSTANT_BUFFER_8:
1495 return 512 + 4096 * 8;
1496 case AMDGPUAS::CONSTANT_BUFFER_9:
1497 return 512 + 4096 * 9;
1498 case AMDGPUAS::CONSTANT_BUFFER_10:
1499 return 512 + 4096 * 10;
1500 case AMDGPUAS::CONSTANT_BUFFER_11:
1501 return 512 + 4096 * 11;
1502 case AMDGPUAS::CONSTANT_BUFFER_12:
1503 return 512 + 4096 * 12;
1504 case AMDGPUAS::CONSTANT_BUFFER_13:
1505 return 512 + 4096 * 13;
1506 case AMDGPUAS::CONSTANT_BUFFER_14:
1507 return 512 + 4096 * 14;
1508 case AMDGPUAS::CONSTANT_BUFFER_15:
1509 return 512 + 4096 * 15;
1510 default:
1511 return -1;
1512 }
1513}
1514
1515SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1516{
1517 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001518 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001519 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1520 SDValue Chain = Op.getOperand(0);
1521 SDValue Ptr = Op.getOperand(1);
1522 SDValue LoweredLoad;
1523
Tom Stellarde9373602014-01-22 19:24:14 +00001524 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1525 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001526 SDValue Ops[2] = {
1527 Ret,
1528 Chain
1529 };
Craig Topper64941d92014-04-27 19:20:57 +00001530 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001531 }
1532
Tom Stellard067c8152014-07-21 14:01:14 +00001533 // Lower loads constant address space global variable loads
1534 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
1535 isa<GlobalVariable>(
1536 GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
1537
1538 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1539 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1540 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1541 DAG.getConstant(2, MVT::i32));
1542 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1543 LoadNode->getChain(), Ptr,
1544 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
1545 }
Tom Stellarde9373602014-01-22 19:24:14 +00001546
Tom Stellard35bb18c2013-08-26 15:06:04 +00001547 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1548 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001549 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001550 Chain
1551 };
Craig Topper64941d92014-04-27 19:20:57 +00001552 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001553 }
1554
Tom Stellard365366f2013-01-23 02:09:06 +00001555 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001556 if (ConstantBlock > -1 &&
1557 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1558 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001559 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001560 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1561 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001562 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001563 SDValue Slots[4];
1564 for (unsigned i = 0; i < 4; i++) {
1565 // We want Const position encoded with the following formula :
1566 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1567 // const_index is Ptr computed by llvm using an alignment of 16.
1568 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1569 // then div by 4 at the ISel step
1570 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1571 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1572 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1573 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001574 EVT NewVT = MVT::v4i32;
1575 unsigned NumElements = 4;
1576 if (VT.isVector()) {
1577 NewVT = VT;
1578 NumElements = VT.getVectorNumElements();
1579 }
Craig Topper48d114b2014-04-26 18:35:24 +00001580 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001581 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001582 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001583 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001584 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001585 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001586 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001587 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001588 );
1589 }
1590
1591 if (!VT.isVector()) {
1592 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1593 DAG.getConstant(0, MVT::i32));
1594 }
1595
1596 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001597 Result,
1598 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001599 };
Craig Topper64941d92014-04-27 19:20:57 +00001600 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001601 }
1602
Matt Arsenault909d0c02013-10-30 23:43:29 +00001603 // For most operations returning SDValue() will result in the node being
1604 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1605 // need to manually expand loads that may be legal in some address spaces and
1606 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1607 // compute shaders, since the data is sign extended when it is uploaded to the
1608 // buffer. However SEXT loads from other address spaces are not supported, so
1609 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001610 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1611 EVT MemVT = LoadNode->getMemoryVT();
1612 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1613 SDValue ShiftAmount =
1614 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1615 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1616 LoadNode->getPointerInfo(), MemVT,
1617 LoadNode->isVolatile(),
1618 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001619 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001620 LoadNode->getAlignment());
1621 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1622 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1623
1624 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001625 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001626 }
1627
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001628 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1629 return SDValue();
1630 }
1631
1632 // Lowering for indirect addressing
1633 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +00001634 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
1635 getTargetMachine().getSubtargetImpl()->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001636 unsigned StackWidth = TFL->getStackWidth(MF);
1637
1638 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1639
1640 if (VT.isVector()) {
1641 unsigned NumElemVT = VT.getVectorNumElements();
1642 EVT ElemVT = VT.getVectorElementType();
1643 SDValue Loads[4];
1644
1645 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1646 "vector width in load");
1647
1648 for (unsigned i = 0; i < NumElemVT; ++i) {
1649 unsigned Channel, PtrIncr;
1650 getStackAddress(StackWidth, i, Channel, PtrIncr);
1651 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1652 DAG.getConstant(PtrIncr, MVT::i32));
1653 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1654 Chain, Ptr,
1655 DAG.getTargetConstant(Channel, MVT::i32),
1656 Op.getOperand(2));
1657 }
1658 for (unsigned i = NumElemVT; i < 4; ++i) {
1659 Loads[i] = DAG.getUNDEF(ElemVT);
1660 }
1661 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001662 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001663 } else {
1664 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1665 Chain, Ptr,
1666 DAG.getTargetConstant(0, MVT::i32), // Channel
1667 Op.getOperand(2));
1668 }
1669
Matt Arsenault7939acd2014-04-07 16:44:24 +00001670 SDValue Ops[2] = {
1671 LoweredLoad,
1672 Chain
1673 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001674
Craig Topper64941d92014-04-27 19:20:57 +00001675 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001676}
Tom Stellard75aadc22012-12-11 21:25:42 +00001677
Matt Arsenault1d555c42014-06-23 18:00:55 +00001678SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1679 SDValue Chain = Op.getOperand(0);
1680 SDValue Cond = Op.getOperand(1);
1681 SDValue Jump = Op.getOperand(2);
1682
1683 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1684 Chain, Jump, Cond);
1685}
1686
Tom Stellard75aadc22012-12-11 21:25:42 +00001687/// XXX Only kernel functions are supported, so we can assume for now that
1688/// every function is a kernel function, but in the future we should use
1689/// separate calling conventions for kernel and non-kernel functions.
1690SDValue R600TargetLowering::LowerFormalArguments(
1691 SDValue Chain,
1692 CallingConv::ID CallConv,
1693 bool isVarArg,
1694 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001695 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001696 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001697 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001698 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1699 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001700 MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault762af962014-07-13 03:06:39 +00001701 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001702
Tom Stellardaf775432013-10-23 00:44:32 +00001703 SmallVector<ISD::InputArg, 8> LocalIns;
1704
Matt Arsenault209a7b92014-04-18 07:40:20 +00001705 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001706
1707 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001708
Tom Stellard1e803092013-07-23 01:48:18 +00001709 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001710 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001711 const ISD::InputArg &In = Ins[i];
1712 EVT VT = In.VT;
1713 EVT MemVT = VA.getLocVT();
1714 if (!VT.isVector() && MemVT.isVector()) {
1715 // Get load source type if scalarized.
1716 MemVT = MemVT.getVectorElementType();
1717 }
Tom Stellard78e01292013-07-23 01:47:58 +00001718
Vincent Lejeunef143af32013-11-11 22:10:24 +00001719 if (ShaderType != ShaderType::COMPUTE) {
1720 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1721 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1722 InVals.push_back(Register);
1723 continue;
1724 }
1725
Tom Stellard75aadc22012-12-11 21:25:42 +00001726 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001727 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001728
Matt Arsenaultfae02982014-03-17 18:58:11 +00001729 // i64 isn't a legal type, so the register type used ends up as i32, which
1730 // isn't expected here. It attempts to create this sextload, but it ends up
1731 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1732 // for <1 x i64>.
1733
Tom Stellardacfeebf2013-07-23 01:48:05 +00001734 // The first 36 bytes of the input buffer contains information about
1735 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001736 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1737 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1738 // FIXME: This should really check the extload type, but the handling of
1739 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001740
Matt Arsenault74ef2772014-08-13 18:14:11 +00001741 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1742 Ext = ISD::SEXTLOAD;
1743 }
1744
1745 // Compute the offset from the value.
1746 // XXX - I think PartOffset should give you this, but it seems to give the
1747 // size of the register which isn't useful.
1748
1749 unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
1750 unsigned PartOffset = VA.getLocMemOffset();
1751
1752 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1753 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
1754 DAG.getConstant(36 + PartOffset, MVT::i32),
1755 DAG.getUNDEF(MVT::i32),
1756 PtrInfo,
1757 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001758
1759 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001760 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001761 }
1762 return Chain;
1763}
1764
Matt Arsenault758659232013-05-18 00:21:46 +00001765EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001766 if (!VT.isVector())
1767 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001768 return VT.changeVectorElementTypeToInteger();
1769}
1770
Matt Arsenault209a7b92014-04-18 07:40:20 +00001771static SDValue CompactSwizzlableVector(
1772 SelectionDAG &DAG, SDValue VectorEntry,
1773 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001774 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1775 assert(RemapSwizzle.empty());
1776 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001777 VectorEntry.getOperand(0),
1778 VectorEntry.getOperand(1),
1779 VectorEntry.getOperand(2),
1780 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001781 };
1782
1783 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001784 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1785 // We mask write here to teach later passes that the ith element of this
1786 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1787 // break false dependencies and additionnaly make assembly easier to read.
1788 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001789 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1790 if (C->isZero()) {
1791 RemapSwizzle[i] = 4; // SEL_0
1792 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1793 } else if (C->isExactlyValue(1.0)) {
1794 RemapSwizzle[i] = 5; // SEL_1
1795 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1796 }
1797 }
1798
1799 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1800 continue;
1801 for (unsigned j = 0; j < i; j++) {
1802 if (NewBldVec[i] == NewBldVec[j]) {
1803 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1804 RemapSwizzle[i] = j;
1805 break;
1806 }
1807 }
1808 }
1809
1810 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001811 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001812}
1813
Benjamin Kramer193960c2013-06-11 13:32:25 +00001814static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1815 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001816 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1817 assert(RemapSwizzle.empty());
1818 SDValue NewBldVec[4] = {
1819 VectorEntry.getOperand(0),
1820 VectorEntry.getOperand(1),
1821 VectorEntry.getOperand(2),
1822 VectorEntry.getOperand(3)
1823 };
1824 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001825 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001826 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001827 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1828 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1829 ->getZExtValue();
1830 if (i == Idx)
1831 isUnmovable[Idx] = true;
1832 }
1833 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001834
1835 for (unsigned i = 0; i < 4; i++) {
1836 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1837 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1838 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001839 if (isUnmovable[Idx])
1840 continue;
1841 // Swap i and Idx
1842 std::swap(NewBldVec[Idx], NewBldVec[i]);
1843 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1844 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001845 }
1846 }
1847
1848 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001849 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001850}
1851
1852
1853SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1854SDValue Swz[4], SelectionDAG &DAG) const {
1855 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1856 // Old -> New swizzle values
1857 DenseMap<unsigned, unsigned> SwizzleRemap;
1858
1859 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1860 for (unsigned i = 0; i < 4; i++) {
1861 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1862 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1863 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1864 }
1865
1866 SwizzleRemap.clear();
1867 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1868 for (unsigned i = 0; i < 4; i++) {
1869 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1870 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1871 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1872 }
1873
1874 return BuildVector;
1875}
1876
1877
Tom Stellard75aadc22012-12-11 21:25:42 +00001878//===----------------------------------------------------------------------===//
1879// Custom DAG Optimizations
1880//===----------------------------------------------------------------------===//
1881
1882SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1883 DAGCombinerInfo &DCI) const {
1884 SelectionDAG &DAG = DCI.DAG;
1885
1886 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001887 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001888 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1889 case ISD::FP_ROUND: {
1890 SDValue Arg = N->getOperand(0);
1891 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001892 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001893 Arg.getOperand(0));
1894 }
1895 break;
1896 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001897
1898 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1899 // (i32 select_cc f32, f32, -1, 0 cc)
1900 //
1901 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1902 // this to one of the SET*_DX10 instructions.
1903 case ISD::FP_TO_SINT: {
1904 SDValue FNeg = N->getOperand(0);
1905 if (FNeg.getOpcode() != ISD::FNEG) {
1906 return SDValue();
1907 }
1908 SDValue SelectCC = FNeg.getOperand(0);
1909 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1910 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1911 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1912 !isHWTrueValue(SelectCC.getOperand(2)) ||
1913 !isHWFalseValue(SelectCC.getOperand(3))) {
1914 return SDValue();
1915 }
1916
Andrew Trickef9de2a2013-05-25 02:42:55 +00001917 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001918 SelectCC.getOperand(0), // LHS
1919 SelectCC.getOperand(1), // RHS
1920 DAG.getConstant(-1, MVT::i32), // True
1921 DAG.getConstant(0, MVT::i32), // Flase
1922 SelectCC.getOperand(4)); // CC
1923
1924 break;
1925 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001926
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001927 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1928 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001929 case ISD::INSERT_VECTOR_ELT: {
1930 SDValue InVec = N->getOperand(0);
1931 SDValue InVal = N->getOperand(1);
1932 SDValue EltNo = N->getOperand(2);
1933 SDLoc dl(N);
1934
1935 // If the inserted element is an UNDEF, just use the input vector.
1936 if (InVal.getOpcode() == ISD::UNDEF)
1937 return InVec;
1938
1939 EVT VT = InVec.getValueType();
1940
1941 // If we can't generate a legal BUILD_VECTOR, exit
1942 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1943 return SDValue();
1944
1945 // Check that we know which element is being inserted
1946 if (!isa<ConstantSDNode>(EltNo))
1947 return SDValue();
1948 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1949
1950 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1951 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1952 // vector elements.
1953 SmallVector<SDValue, 8> Ops;
1954 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1955 Ops.append(InVec.getNode()->op_begin(),
1956 InVec.getNode()->op_end());
1957 } else if (InVec.getOpcode() == ISD::UNDEF) {
1958 unsigned NElts = VT.getVectorNumElements();
1959 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1960 } else {
1961 return SDValue();
1962 }
1963
1964 // Insert the element
1965 if (Elt < Ops.size()) {
1966 // All the operands of BUILD_VECTOR must have the same type;
1967 // we enforce that here.
1968 EVT OpVT = Ops[0].getValueType();
1969 if (InVal.getValueType() != OpVT)
1970 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1971 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1972 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1973 Ops[Elt] = InVal;
1974 }
1975
1976 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001977 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001978 }
1979
Tom Stellard365366f2013-01-23 02:09:06 +00001980 // Extract_vec (Build_vector) generated by custom lowering
1981 // also needs to be customly combined
1982 case ISD::EXTRACT_VECTOR_ELT: {
1983 SDValue Arg = N->getOperand(0);
1984 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1985 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1986 unsigned Element = Const->getZExtValue();
1987 return Arg->getOperand(Element);
1988 }
1989 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001990 if (Arg.getOpcode() == ISD::BITCAST &&
1991 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1992 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1993 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001994 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001995 Arg->getOperand(0).getOperand(Element));
1996 }
1997 }
Tom Stellard365366f2013-01-23 02:09:06 +00001998 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001999
2000 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002001 // Try common optimizations
2002 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
2003 if (Ret.getNode())
2004 return Ret;
2005
Tom Stellarde06163a2013-02-07 14:02:35 +00002006 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2007 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002008 //
2009 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2010 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002011 SDValue LHS = N->getOperand(0);
2012 if (LHS.getOpcode() != ISD::SELECT_CC) {
2013 return SDValue();
2014 }
2015
2016 SDValue RHS = N->getOperand(1);
2017 SDValue True = N->getOperand(2);
2018 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002019 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002020
2021 if (LHS.getOperand(2).getNode() != True.getNode() ||
2022 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002023 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002024 return SDValue();
2025 }
2026
Tom Stellard5e524892013-03-08 15:37:11 +00002027 switch (NCC) {
2028 default: return SDValue();
2029 case ISD::SETNE: return LHS;
2030 case ISD::SETEQ: {
2031 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2032 LHSCC = ISD::getSetCCInverse(LHSCC,
2033 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002034 if (DCI.isBeforeLegalizeOps() ||
2035 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2036 return DAG.getSelectCC(SDLoc(N),
2037 LHS.getOperand(0),
2038 LHS.getOperand(1),
2039 LHS.getOperand(2),
2040 LHS.getOperand(3),
2041 LHSCC);
2042 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002043 }
Tom Stellard5e524892013-03-08 15:37:11 +00002044 }
Tom Stellardcd428182013-09-28 02:50:38 +00002045 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002046 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002047
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002048 case AMDGPUISD::EXPORT: {
2049 SDValue Arg = N->getOperand(1);
2050 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2051 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002052
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002053 SDValue NewArgs[8] = {
2054 N->getOperand(0), // Chain
2055 SDValue(),
2056 N->getOperand(2), // ArrayBase
2057 N->getOperand(3), // Type
2058 N->getOperand(4), // SWZ_X
2059 N->getOperand(5), // SWZ_Y
2060 N->getOperand(6), // SWZ_Z
2061 N->getOperand(7) // SWZ_W
2062 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002063 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002064 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002065 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002066 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002067 case AMDGPUISD::TEXTURE_FETCH: {
2068 SDValue Arg = N->getOperand(1);
2069 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2070 break;
2071
2072 SDValue NewArgs[19] = {
2073 N->getOperand(0),
2074 N->getOperand(1),
2075 N->getOperand(2),
2076 N->getOperand(3),
2077 N->getOperand(4),
2078 N->getOperand(5),
2079 N->getOperand(6),
2080 N->getOperand(7),
2081 N->getOperand(8),
2082 N->getOperand(9),
2083 N->getOperand(10),
2084 N->getOperand(11),
2085 N->getOperand(12),
2086 N->getOperand(13),
2087 N->getOperand(14),
2088 N->getOperand(15),
2089 N->getOperand(16),
2090 N->getOperand(17),
2091 N->getOperand(18),
2092 };
2093 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2094 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002095 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002096 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002097 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002098
2099 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002100}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002101
2102static bool
2103FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002104 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002105 const R600InstrInfo *TII =
2106 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002107 if (!Src.isMachineOpcode())
2108 return false;
2109 switch (Src.getMachineOpcode()) {
2110 case AMDGPU::FNEG_R600:
2111 if (!Neg.getNode())
2112 return false;
2113 Src = Src.getOperand(0);
2114 Neg = DAG.getTargetConstant(1, MVT::i32);
2115 return true;
2116 case AMDGPU::FABS_R600:
2117 if (!Abs.getNode())
2118 return false;
2119 Src = Src.getOperand(0);
2120 Abs = DAG.getTargetConstant(1, MVT::i32);
2121 return true;
2122 case AMDGPU::CONST_COPY: {
2123 unsigned Opcode = ParentNode->getMachineOpcode();
2124 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2125
2126 if (!Sel.getNode())
2127 return false;
2128
2129 SDValue CstOffset = Src.getOperand(0);
2130 if (ParentNode->getValueType(0).isVector())
2131 return false;
2132
2133 // Gather constants values
2134 int SrcIndices[] = {
2135 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2136 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2137 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2138 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2139 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2140 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2141 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2142 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2143 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2144 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2145 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2146 };
2147 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002148 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002149 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2150 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2151 continue;
2152 if (HasDst) {
2153 OtherSrcIdx--;
2154 OtherSelIdx--;
2155 }
2156 if (RegisterSDNode *Reg =
2157 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2158 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002159 ConstantSDNode *Cst
2160 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002161 Consts.push_back(Cst->getZExtValue());
2162 }
2163 }
2164 }
2165
Matt Arsenault37c12d72014-05-12 20:42:57 +00002166 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002167 Consts.push_back(Cst->getZExtValue());
2168 if (!TII->fitsConstReadLimitations(Consts)) {
2169 return false;
2170 }
2171
2172 Sel = CstOffset;
2173 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2174 return true;
2175 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002176 case AMDGPU::MOV_IMM_I32:
2177 case AMDGPU::MOV_IMM_F32: {
2178 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2179 uint64_t ImmValue = 0;
2180
2181
2182 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2183 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2184 float FloatValue = FPC->getValueAPF().convertToFloat();
2185 if (FloatValue == 0.0) {
2186 ImmReg = AMDGPU::ZERO;
2187 } else if (FloatValue == 0.5) {
2188 ImmReg = AMDGPU::HALF;
2189 } else if (FloatValue == 1.0) {
2190 ImmReg = AMDGPU::ONE;
2191 } else {
2192 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2193 }
2194 } else {
2195 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2196 uint64_t Value = C->getZExtValue();
2197 if (Value == 0) {
2198 ImmReg = AMDGPU::ZERO;
2199 } else if (Value == 1) {
2200 ImmReg = AMDGPU::ONE_INT;
2201 } else {
2202 ImmValue = Value;
2203 }
2204 }
2205
2206 // Check that we aren't already using an immediate.
2207 // XXX: It's possible for an instruction to have more than one
2208 // immediate operand, but this is not supported yet.
2209 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2210 if (!Imm.getNode())
2211 return false;
2212 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2213 assert(C);
2214 if (C->getZExtValue())
2215 return false;
2216 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2217 }
2218 Src = DAG.getRegister(ImmReg, MVT::i32);
2219 return true;
2220 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002221 default:
2222 return false;
2223 }
2224}
2225
2226
2227/// \brief Fold the instructions after selecting them
2228SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2229 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002230 const R600InstrInfo *TII =
2231 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002232 if (!Node->isMachineOpcode())
2233 return Node;
2234 unsigned Opcode = Node->getMachineOpcode();
2235 SDValue FakeOp;
2236
2237 std::vector<SDValue> Ops;
Craig Topper66e588b2014-06-29 00:40:57 +00002238 for (const SDUse &I : Node->ops())
2239 Ops.push_back(I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002240
2241 if (Opcode == AMDGPU::DOT_4) {
2242 int OperandIdx[] = {
2243 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2244 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2245 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2246 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2247 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2248 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2249 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2250 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002251 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002252 int NegIdx[] = {
2253 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2254 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2255 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2256 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2257 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2258 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2259 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2260 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2261 };
2262 int AbsIdx[] = {
2263 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2264 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2265 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2266 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2267 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2268 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2269 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2270 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2271 };
2272 for (unsigned i = 0; i < 8; i++) {
2273 if (OperandIdx[i] < 0)
2274 return Node;
2275 SDValue &Src = Ops[OperandIdx[i] - 1];
2276 SDValue &Neg = Ops[NegIdx[i] - 1];
2277 SDValue &Abs = Ops[AbsIdx[i] - 1];
2278 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2279 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2280 if (HasDst)
2281 SelIdx--;
2282 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002283 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2284 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2285 }
2286 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2287 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2288 SDValue &Src = Ops[i];
2289 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002290 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2291 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002292 } else if (Opcode == AMDGPU::CLAMP_R600) {
2293 SDValue Src = Node->getOperand(0);
2294 if (!Src.isMachineOpcode() ||
2295 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2296 return Node;
2297 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2298 AMDGPU::OpName::clamp);
2299 if (ClampIdx < 0)
2300 return Node;
2301 std::vector<SDValue> Ops;
2302 unsigned NumOp = Src.getNumOperands();
2303 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002304 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002305 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2306 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2307 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002308 } else {
2309 if (!TII->hasInstrModifiers(Opcode))
2310 return Node;
2311 int OperandIdx[] = {
2312 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2313 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2314 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2315 };
2316 int NegIdx[] = {
2317 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2318 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2319 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2320 };
2321 int AbsIdx[] = {
2322 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2323 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2324 -1
2325 };
2326 for (unsigned i = 0; i < 3; i++) {
2327 if (OperandIdx[i] < 0)
2328 return Node;
2329 SDValue &Src = Ops[OperandIdx[i] - 1];
2330 SDValue &Neg = Ops[NegIdx[i] - 1];
2331 SDValue FakeAbs;
2332 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2333 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2334 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002335 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2336 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002337 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002338 ImmIdx--;
2339 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002340 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002341 SDValue &Imm = Ops[ImmIdx];
2342 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002343 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2344 }
2345 }
2346
2347 return Node;
2348}