blob: a214e533b1a96007cd7a10a00e2b3b42a35fde10 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
33R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000034 AMDGPUTargetLowering(TM),
35 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Tom Stellard75aadc22012-12-11 21:25:42 +000043 computeRegisterProperties();
44
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Matt Arsenault4e466652014-04-16 01:41:30 +000094 // Expand sign extension of vectors
95 if (!Subtarget->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
97
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
105
106 if (!Subtarget->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
114
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
116
117
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000120 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000121 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000125 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
126 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
127 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
128 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000129 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
130 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
131
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000132 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000134 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000135 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000136 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
137 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000138
Tom Stellard365366f2013-01-23 02:09:06 +0000139 setOperationAction(ISD::LOAD, MVT::i32, Custom);
140 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000141 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
142
Tom Stellard880a80a2014-06-17 16:53:14 +0000143 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
144 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
145 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
146 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
147
148 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
149 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
150 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
151 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
152
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000154 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000155 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000156 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000157 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000158
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000159 setOperationAction(ISD::SUB, MVT::i64, Expand);
160
Tom Stellard5f337882014-04-29 23:12:43 +0000161 // These should be replaced by UDVIREM, but it does not happen automatically
162 // during Type Legalization
163 setOperationAction(ISD::UDIV, MVT::i64, Custom);
164 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000165 setOperationAction(ISD::SDIV, MVT::i64, Custom);
166 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000167
Jan Vesely25f36272014-06-18 12:27:13 +0000168 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
169 // to be Legal/Custom in order to avoid library calls.
170 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000171 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000172 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000173
Michel Danzer49812b52013-07-10 16:37:07 +0000174 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
175
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000176 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
177 for (MVT VT : ScalarIntVTs) {
178 setOperationAction(ISD::ADDC, VT, Expand);
179 setOperationAction(ISD::SUBC, VT, Expand);
180 setOperationAction(ISD::ADDE, VT, Expand);
181 setOperationAction(ISD::SUBE, VT, Expand);
182 }
183
Tom Stellardb852af52013-03-08 15:37:03 +0000184 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000185 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000186 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000187}
188
189MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
190 MachineInstr * MI, MachineBasicBlock * BB) const {
191 MachineFunction * MF = BB->getParent();
192 MachineRegisterInfo &MRI = MF->getRegInfo();
193 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000194 const R600InstrInfo *TII =
195 static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000196
197 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000198 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000199 // Replace LDS_*_RET instruction that don't have any uses with the
200 // equivalent LDS_*_NORET instruction.
201 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000202 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
203 assert(DstIdx != -1);
204 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000205 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
206 // LDS_1A2D support and remove this special case.
207 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
208 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000209 return BB;
210
211 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
212 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000213 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
214 NewMI.addOperand(MI->getOperand(i));
215 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000216 } else {
217 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
218 }
219 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000220 case AMDGPU::CLAMP_R600: {
221 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
222 AMDGPU::MOV,
223 MI->getOperand(0).getReg(),
224 MI->getOperand(1).getReg());
225 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
226 break;
227 }
228
229 case AMDGPU::FABS_R600: {
230 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
231 AMDGPU::MOV,
232 MI->getOperand(0).getReg(),
233 MI->getOperand(1).getReg());
234 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
235 break;
236 }
237
238 case AMDGPU::FNEG_R600: {
239 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
240 AMDGPU::MOV,
241 MI->getOperand(0).getReg(),
242 MI->getOperand(1).getReg());
243 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
244 break;
245 }
246
Tom Stellard75aadc22012-12-11 21:25:42 +0000247 case AMDGPU::MASK_WRITE: {
248 unsigned maskedRegister = MI->getOperand(0).getReg();
249 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
250 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
251 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
252 break;
253 }
254
255 case AMDGPU::MOV_IMM_F32:
256 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
257 MI->getOperand(1).getFPImm()->getValueAPF()
258 .bitcastToAPInt().getZExtValue());
259 break;
260 case AMDGPU::MOV_IMM_I32:
261 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
262 MI->getOperand(1).getImm());
263 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000264 case AMDGPU::CONST_COPY: {
265 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
266 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000267 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000268 MI->getOperand(1).getImm());
269 break;
270 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000271
272 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000273 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000275 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000276
277 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
278 .addOperand(MI->getOperand(0))
279 .addOperand(MI->getOperand(1))
280 .addImm(EOP); // Set End of program bit
281 break;
282 }
283
Tom Stellard75aadc22012-12-11 21:25:42 +0000284 case AMDGPU::TXD: {
285 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
286 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000287 MachineOperand &RID = MI->getOperand(4);
288 MachineOperand &SID = MI->getOperand(5);
289 unsigned TextureId = MI->getOperand(6).getImm();
290 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
291 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000292
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000293 switch (TextureId) {
294 case 5: // Rect
295 CTX = CTY = 0;
296 break;
297 case 6: // Shadow1D
298 SrcW = SrcZ;
299 break;
300 case 7: // Shadow2D
301 SrcW = SrcZ;
302 break;
303 case 8: // ShadowRect
304 CTX = CTY = 0;
305 SrcW = SrcZ;
306 break;
307 case 9: // 1DArray
308 SrcZ = SrcY;
309 CTZ = 0;
310 break;
311 case 10: // 2DArray
312 CTZ = 0;
313 break;
314 case 11: // Shadow1DArray
315 SrcZ = SrcY;
316 CTZ = 0;
317 break;
318 case 12: // Shadow2DArray
319 CTZ = 0;
320 break;
321 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
323 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000324 .addImm(SrcX)
325 .addImm(SrcY)
326 .addImm(SrcZ)
327 .addImm(SrcW)
328 .addImm(0)
329 .addImm(0)
330 .addImm(0)
331 .addImm(0)
332 .addImm(1)
333 .addImm(2)
334 .addImm(3)
335 .addOperand(RID)
336 .addOperand(SID)
337 .addImm(CTX)
338 .addImm(CTY)
339 .addImm(CTZ)
340 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000341 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
342 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000343 .addImm(SrcX)
344 .addImm(SrcY)
345 .addImm(SrcZ)
346 .addImm(SrcW)
347 .addImm(0)
348 .addImm(0)
349 .addImm(0)
350 .addImm(0)
351 .addImm(1)
352 .addImm(2)
353 .addImm(3)
354 .addOperand(RID)
355 .addOperand(SID)
356 .addImm(CTX)
357 .addImm(CTY)
358 .addImm(CTZ)
359 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000360 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
361 .addOperand(MI->getOperand(0))
362 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000363 .addImm(SrcX)
364 .addImm(SrcY)
365 .addImm(SrcZ)
366 .addImm(SrcW)
367 .addImm(0)
368 .addImm(0)
369 .addImm(0)
370 .addImm(0)
371 .addImm(1)
372 .addImm(2)
373 .addImm(3)
374 .addOperand(RID)
375 .addOperand(SID)
376 .addImm(CTX)
377 .addImm(CTY)
378 .addImm(CTZ)
379 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000380 .addReg(T0, RegState::Implicit)
381 .addReg(T1, RegState::Implicit);
382 break;
383 }
384
385 case AMDGPU::TXD_SHADOW: {
386 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
387 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000388 MachineOperand &RID = MI->getOperand(4);
389 MachineOperand &SID = MI->getOperand(5);
390 unsigned TextureId = MI->getOperand(6).getImm();
391 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
392 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
393
394 switch (TextureId) {
395 case 5: // Rect
396 CTX = CTY = 0;
397 break;
398 case 6: // Shadow1D
399 SrcW = SrcZ;
400 break;
401 case 7: // Shadow2D
402 SrcW = SrcZ;
403 break;
404 case 8: // ShadowRect
405 CTX = CTY = 0;
406 SrcW = SrcZ;
407 break;
408 case 9: // 1DArray
409 SrcZ = SrcY;
410 CTZ = 0;
411 break;
412 case 10: // 2DArray
413 CTZ = 0;
414 break;
415 case 11: // Shadow1DArray
416 SrcZ = SrcY;
417 CTZ = 0;
418 break;
419 case 12: // Shadow2DArray
420 CTZ = 0;
421 break;
422 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000423
424 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
425 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000426 .addImm(SrcX)
427 .addImm(SrcY)
428 .addImm(SrcZ)
429 .addImm(SrcW)
430 .addImm(0)
431 .addImm(0)
432 .addImm(0)
433 .addImm(0)
434 .addImm(1)
435 .addImm(2)
436 .addImm(3)
437 .addOperand(RID)
438 .addOperand(SID)
439 .addImm(CTX)
440 .addImm(CTY)
441 .addImm(CTZ)
442 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
444 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000445 .addImm(SrcX)
446 .addImm(SrcY)
447 .addImm(SrcZ)
448 .addImm(SrcW)
449 .addImm(0)
450 .addImm(0)
451 .addImm(0)
452 .addImm(0)
453 .addImm(1)
454 .addImm(2)
455 .addImm(3)
456 .addOperand(RID)
457 .addOperand(SID)
458 .addImm(CTX)
459 .addImm(CTY)
460 .addImm(CTZ)
461 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000462 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
463 .addOperand(MI->getOperand(0))
464 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000465 .addImm(SrcX)
466 .addImm(SrcY)
467 .addImm(SrcZ)
468 .addImm(SrcW)
469 .addImm(0)
470 .addImm(0)
471 .addImm(0)
472 .addImm(0)
473 .addImm(1)
474 .addImm(2)
475 .addImm(3)
476 .addOperand(RID)
477 .addOperand(SID)
478 .addImm(CTX)
479 .addImm(CTY)
480 .addImm(CTZ)
481 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 .addReg(T0, RegState::Implicit)
483 .addReg(T1, RegState::Implicit);
484 break;
485 }
486
487 case AMDGPU::BRANCH:
488 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000489 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000490 break;
491
492 case AMDGPU::BRANCH_COND_f32: {
493 MachineInstr *NewMI =
494 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
495 AMDGPU::PREDICATE_BIT)
496 .addOperand(MI->getOperand(1))
497 .addImm(OPCODE_IS_NOT_ZERO)
498 .addImm(0); // Flags
499 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000500 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000501 .addOperand(MI->getOperand(0))
502 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
503 break;
504 }
505
506 case AMDGPU::BRANCH_COND_i32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO_INT)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 case AMDGPU::EG_ExportSwz:
521 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000522 // Instruction is left unmodified if its not the last one of its type
523 bool isLastInstructionOfItsType = true;
524 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000525 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000526 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000527 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000528 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
529 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
530 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
531 .getImm();
532 if (CurrentInstExportType == InstExportType) {
533 isLastInstructionOfItsType = false;
534 break;
535 }
536 }
537 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000538 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000539 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000540 return BB;
541 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
542 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
543 .addOperand(MI->getOperand(0))
544 .addOperand(MI->getOperand(1))
545 .addOperand(MI->getOperand(2))
546 .addOperand(MI->getOperand(3))
547 .addOperand(MI->getOperand(4))
548 .addOperand(MI->getOperand(5))
549 .addOperand(MI->getOperand(6))
550 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000551 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000552 break;
553 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000554 case AMDGPU::RETURN: {
555 // RETURN instructions must have the live-out registers as implicit uses,
556 // otherwise they appear dead.
557 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
558 MachineInstrBuilder MIB(*MF, MI);
559 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
560 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
561 return BB;
562 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000563 }
564
565 MI->eraseFromParent();
566 return BB;
567}
568
569//===----------------------------------------------------------------------===//
570// Custom DAG Lowering Operations
571//===----------------------------------------------------------------------===//
572
Tom Stellard75aadc22012-12-11 21:25:42 +0000573SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000574 MachineFunction &MF = DAG.getMachineFunction();
575 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000576 switch (Op.getOpcode()) {
577 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000578 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
579 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000580 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000581 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000582 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000583 case ISD::FCOS:
584 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000585 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000586 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000587 case ISD::LOAD: {
588 SDValue Result = LowerLOAD(Op, DAG);
589 assert((!Result.getNode() ||
590 Result.getNode()->getNumValues() == 2) &&
591 "Load should return a value and a chain");
592 return Result;
593 }
594
Matt Arsenault1d555c42014-06-23 18:00:55 +0000595 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000596 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000597 case ISD::INTRINSIC_VOID: {
598 SDValue Chain = Op.getOperand(0);
599 unsigned IntrinsicID =
600 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
601 switch (IntrinsicID) {
602 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000603 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
604 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000605 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000606 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000607 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000608 case AMDGPUIntrinsic::R600_store_swizzle: {
609 const SDValue Args[8] = {
610 Chain,
611 Op.getOperand(2), // Export Value
612 Op.getOperand(3), // ArrayBase
613 Op.getOperand(4), // Type
614 DAG.getConstant(0, MVT::i32), // SWZ_X
615 DAG.getConstant(1, MVT::i32), // SWZ_Y
616 DAG.getConstant(2, MVT::i32), // SWZ_Z
617 DAG.getConstant(3, MVT::i32) // SWZ_W
618 };
Craig Topper48d114b2014-04-26 18:35:24 +0000619 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000621
Tom Stellard75aadc22012-12-11 21:25:42 +0000622 // default for switch(IntrinsicID)
623 default: break;
624 }
625 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
626 break;
627 }
628 case ISD::INTRINSIC_WO_CHAIN: {
629 unsigned IntrinsicID =
630 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
631 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000632 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000633 switch(IntrinsicID) {
634 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000635 case AMDGPUIntrinsic::R600_load_input: {
636 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
637 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
638 MachineFunction &MF = DAG.getMachineFunction();
639 MachineRegisterInfo &MRI = MF.getRegInfo();
640 MRI.addLiveIn(Reg);
641 return DAG.getCopyFromReg(DAG.getEntryNode(),
642 SDLoc(DAG.getEntryNode()), Reg, VT);
643 }
644
645 case AMDGPUIntrinsic::R600_interp_input: {
646 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
647 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
648 MachineSDNode *interp;
649 if (ijb < 0) {
650 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +0000651 const R600InstrInfo *TII = static_cast<const R600InstrInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000652 MF.getSubtarget().getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000653 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
654 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
655 return DAG.getTargetExtractSubreg(
656 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
657 DL, MVT::f32, SDValue(interp, 0));
658 }
659 MachineFunction &MF = DAG.getMachineFunction();
660 MachineRegisterInfo &MRI = MF.getRegInfo();
661 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
662 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
663 MRI.addLiveIn(RegisterI);
664 MRI.addLiveIn(RegisterJ);
665 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
666 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
667 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
668 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
669
670 if (slot % 4 < 2)
671 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
672 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
673 RegisterJNode, RegisterINode);
674 else
675 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
676 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
677 RegisterJNode, RegisterINode);
678 return SDValue(interp, slot % 2);
679 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000680 case AMDGPUIntrinsic::R600_interp_xy:
681 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000682 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000683 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000684 SDValue RegisterINode = Op.getOperand(2);
685 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000686
Vincent Lejeunef143af32013-11-11 22:10:24 +0000687 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000688 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000689 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000690 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000691 else
692 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000693 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000694 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000695 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
696 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000697 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000698 case AMDGPUIntrinsic::R600_tex:
699 case AMDGPUIntrinsic::R600_texc:
700 case AMDGPUIntrinsic::R600_txl:
701 case AMDGPUIntrinsic::R600_txlc:
702 case AMDGPUIntrinsic::R600_txb:
703 case AMDGPUIntrinsic::R600_txbc:
704 case AMDGPUIntrinsic::R600_txf:
705 case AMDGPUIntrinsic::R600_txq:
706 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000707 case AMDGPUIntrinsic::R600_ddy:
708 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000709 unsigned TextureOp;
710 switch (IntrinsicID) {
711 case AMDGPUIntrinsic::R600_tex:
712 TextureOp = 0;
713 break;
714 case AMDGPUIntrinsic::R600_texc:
715 TextureOp = 1;
716 break;
717 case AMDGPUIntrinsic::R600_txl:
718 TextureOp = 2;
719 break;
720 case AMDGPUIntrinsic::R600_txlc:
721 TextureOp = 3;
722 break;
723 case AMDGPUIntrinsic::R600_txb:
724 TextureOp = 4;
725 break;
726 case AMDGPUIntrinsic::R600_txbc:
727 TextureOp = 5;
728 break;
729 case AMDGPUIntrinsic::R600_txf:
730 TextureOp = 6;
731 break;
732 case AMDGPUIntrinsic::R600_txq:
733 TextureOp = 7;
734 break;
735 case AMDGPUIntrinsic::R600_ddx:
736 TextureOp = 8;
737 break;
738 case AMDGPUIntrinsic::R600_ddy:
739 TextureOp = 9;
740 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000741 case AMDGPUIntrinsic::R600_ldptr:
742 TextureOp = 10;
743 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000744 default:
745 llvm_unreachable("Unknow Texture Operation");
746 }
747
748 SDValue TexArgs[19] = {
749 DAG.getConstant(TextureOp, MVT::i32),
750 Op.getOperand(1),
751 DAG.getConstant(0, MVT::i32),
752 DAG.getConstant(1, MVT::i32),
753 DAG.getConstant(2, MVT::i32),
754 DAG.getConstant(3, MVT::i32),
755 Op.getOperand(2),
756 Op.getOperand(3),
757 Op.getOperand(4),
758 DAG.getConstant(0, MVT::i32),
759 DAG.getConstant(1, MVT::i32),
760 DAG.getConstant(2, MVT::i32),
761 DAG.getConstant(3, MVT::i32),
762 Op.getOperand(5),
763 Op.getOperand(6),
764 Op.getOperand(7),
765 Op.getOperand(8),
766 Op.getOperand(9),
767 Op.getOperand(10)
768 };
Craig Topper48d114b2014-04-26 18:35:24 +0000769 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000770 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000771 case AMDGPUIntrinsic::AMDGPU_dp4: {
772 SDValue Args[8] = {
773 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
774 DAG.getConstant(0, MVT::i32)),
775 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
776 DAG.getConstant(0, MVT::i32)),
777 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
778 DAG.getConstant(1, MVT::i32)),
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
780 DAG.getConstant(1, MVT::i32)),
781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
782 DAG.getConstant(2, MVT::i32)),
783 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
784 DAG.getConstant(2, MVT::i32)),
785 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
786 DAG.getConstant(3, MVT::i32)),
787 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
788 DAG.getConstant(3, MVT::i32))
789 };
Craig Topper48d114b2014-04-26 18:35:24 +0000790 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000791 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000792
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000807 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000808 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return LowerImplicitParameter(DAG, VT, DL, 8);
811
Jan Veselye5121f32014-10-14 20:05:26 +0000812 case Intrinsic::AMDGPU_read_workdim:
813 return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
814
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
817 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000818 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
820 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000821 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
823 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000824 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000825 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
826 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000827 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000828 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
829 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000830 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000831 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
832 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000833 case Intrinsic::AMDGPU_rsq:
834 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
835 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000836 }
837 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
838 break;
839 }
840 } // end switch(Op.getOpcode())
841 return SDValue();
842}
843
844void R600TargetLowering::ReplaceNodeResults(SDNode *N,
845 SmallVectorImpl<SDValue> &Results,
846 SelectionDAG &DAG) const {
847 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000848 default:
849 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
850 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000851 case ISD::FP_TO_UINT:
852 if (N->getValueType(0) == MVT::i1) {
853 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
854 return;
855 }
856 // Fall-through. Since we don't care about out of bounds values
857 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
858 // considers some extra cases which are not necessary here.
859 case ISD::FP_TO_SINT: {
860 SDValue Result;
861 if (expandFP_TO_SINT(N, Result, DAG))
862 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000863 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000864 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000865 case ISD::UDIV: {
866 SDValue Op = SDValue(N, 0);
867 SDLoc DL(Op);
868 EVT VT = Op.getValueType();
869 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
870 N->getOperand(0), N->getOperand(1));
871 Results.push_back(UDIVREM);
872 break;
873 }
874 case ISD::UREM: {
875 SDValue Op = SDValue(N, 0);
876 SDLoc DL(Op);
877 EVT VT = Op.getValueType();
878 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
879 N->getOperand(0), N->getOperand(1));
880 Results.push_back(UDIVREM.getValue(1));
881 break;
882 }
883 case ISD::SDIV: {
884 SDValue Op = SDValue(N, 0);
885 SDLoc DL(Op);
886 EVT VT = Op.getValueType();
887 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
888 N->getOperand(0), N->getOperand(1));
889 Results.push_back(SDIVREM);
890 break;
891 }
892 case ISD::SREM: {
893 SDValue Op = SDValue(N, 0);
894 SDLoc DL(Op);
895 EVT VT = Op.getValueType();
896 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
897 N->getOperand(0), N->getOperand(1));
898 Results.push_back(SDIVREM.getValue(1));
899 break;
900 }
901 case ISD::SDIVREM: {
902 SDValue Op = SDValue(N, 1);
903 SDValue RES = LowerSDIVREM(Op, DAG);
904 Results.push_back(RES);
905 Results.push_back(RES.getValue(1));
906 break;
907 }
908 case ISD::UDIVREM: {
909 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000910 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000911 break;
912 }
913 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000914}
915
Tom Stellard880a80a2014-06-17 16:53:14 +0000916SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
917 SDValue Vector) const {
918
919 SDLoc DL(Vector);
920 EVT VecVT = Vector.getValueType();
921 EVT EltVT = VecVT.getVectorElementType();
922 SmallVector<SDValue, 8> Args;
923
924 for (unsigned i = 0, e = VecVT.getVectorNumElements();
925 i != e; ++i) {
926 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
927 Vector, DAG.getConstant(i, getVectorIdxTy())));
928 }
929
930 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
931}
932
933SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
934 SelectionDAG &DAG) const {
935
936 SDLoc DL(Op);
937 SDValue Vector = Op.getOperand(0);
938 SDValue Index = Op.getOperand(1);
939
940 if (isa<ConstantSDNode>(Index) ||
941 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
942 return Op;
943
944 Vector = vectorToVerticalVector(DAG, Vector);
945 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
946 Vector, Index);
947}
948
949SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
950 SelectionDAG &DAG) const {
951 SDLoc DL(Op);
952 SDValue Vector = Op.getOperand(0);
953 SDValue Value = Op.getOperand(1);
954 SDValue Index = Op.getOperand(2);
955
956 if (isa<ConstantSDNode>(Index) ||
957 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
958 return Op;
959
960 Vector = vectorToVerticalVector(DAG, Vector);
961 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
962 Vector, Value, Index);
963 return vectorToVerticalVector(DAG, Insert);
964}
965
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000966SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
967 // On hw >= R700, COS/SIN input must be between -1. and 1.
968 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
969 EVT VT = Op.getValueType();
970 SDValue Arg = Op.getOperand(0);
971 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
972 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
973 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
974 DAG.getConstantFP(0.15915494309, MVT::f32)),
975 DAG.getConstantFP(0.5, MVT::f32)));
976 unsigned TrigNode;
977 switch (Op.getOpcode()) {
978 case ISD::FCOS:
979 TrigNode = AMDGPUISD::COS_HW;
980 break;
981 case ISD::FSIN:
982 TrigNode = AMDGPUISD::SIN_HW;
983 break;
984 default:
985 llvm_unreachable("Wrong trig opcode");
986 }
987 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
988 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
989 DAG.getConstantFP(-0.5, MVT::f32)));
990 if (Gen >= AMDGPUSubtarget::R700)
991 return TrigVal;
992 // On R600 hw, COS/SIN input must be between -Pi and Pi.
993 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
994 DAG.getConstantFP(3.14159265359, MVT::f32));
995}
996
Jan Vesely25f36272014-06-18 12:27:13 +0000997SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
998 SDLoc DL(Op);
999 EVT VT = Op.getValueType();
1000
1001 SDValue Lo = Op.getOperand(0);
1002 SDValue Hi = Op.getOperand(1);
1003 SDValue Shift = Op.getOperand(2);
1004 SDValue Zero = DAG.getConstant(0, VT);
1005 SDValue One = DAG.getConstant(1, VT);
1006
1007 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1008 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1009 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1010 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1011
1012 // The dance around Width1 is necessary for 0 special case.
1013 // Without it the CompShift might be 32, producing incorrect results in
1014 // Overflow. So we do the shift in two steps, the alternative is to
1015 // add a conditional to filter the special case.
1016
1017 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1018 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1019
1020 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1021 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1022 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1023
1024 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1025 SDValue LoBig = Zero;
1026
1027 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1028 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1029
1030 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1031}
1032
Jan Vesely900ff2e2014-06-18 12:27:15 +00001033SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1034 SDLoc DL(Op);
1035 EVT VT = Op.getValueType();
1036
1037 SDValue Lo = Op.getOperand(0);
1038 SDValue Hi = Op.getOperand(1);
1039 SDValue Shift = Op.getOperand(2);
1040 SDValue Zero = DAG.getConstant(0, VT);
1041 SDValue One = DAG.getConstant(1, VT);
1042
Jan Veselyecf51332014-06-18 12:27:17 +00001043 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1044
Jan Vesely900ff2e2014-06-18 12:27:15 +00001045 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1046 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1047 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1048 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1049
1050 // The dance around Width1 is necessary for 0 special case.
1051 // Without it the CompShift might be 32, producing incorrect results in
1052 // Overflow. So we do the shift in two steps, the alternative is to
1053 // add a conditional to filter the special case.
1054
1055 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1056 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1057
Jan Veselyecf51332014-06-18 12:27:17 +00001058 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001059 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1060 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1061
Jan Veselyecf51332014-06-18 12:27:17 +00001062 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1063 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001064
1065 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1066 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1067
1068 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1069}
1070
Tom Stellard75aadc22012-12-11 21:25:42 +00001071SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1072 return DAG.getNode(
1073 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001074 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001075 MVT::i1,
1076 Op, DAG.getConstantFP(0.0f, MVT::f32),
1077 DAG.getCondCode(ISD::SETNE)
1078 );
1079}
1080
Tom Stellard75aadc22012-12-11 21:25:42 +00001081SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001082 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001083 unsigned DwordOffset) const {
1084 unsigned ByteOffset = DwordOffset * 4;
1085 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001086 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001087
1088 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1089 assert(isInt<16>(ByteOffset));
1090
1091 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1092 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1093 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1094 false, false, false, 0);
1095}
1096
Tom Stellard75aadc22012-12-11 21:25:42 +00001097bool R600TargetLowering::isZero(SDValue Op) const {
1098 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1099 return Cst->isNullValue();
1100 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1101 return CstFP->isZero();
1102 } else {
1103 return false;
1104 }
1105}
1106
1107SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001108 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001109 EVT VT = Op.getValueType();
1110
1111 SDValue LHS = Op.getOperand(0);
1112 SDValue RHS = Op.getOperand(1);
1113 SDValue True = Op.getOperand(2);
1114 SDValue False = Op.getOperand(3);
1115 SDValue CC = Op.getOperand(4);
1116 SDValue Temp;
1117
1118 // LHS and RHS are guaranteed to be the same value type
1119 EVT CompareVT = LHS.getValueType();
1120
1121 // Check if we can lower this to a native operation.
1122
Tom Stellard2add82d2013-03-08 15:37:09 +00001123 // Try to lower to a SET* instruction:
1124 //
1125 // SET* can match the following patterns:
1126 //
Tom Stellardcd428182013-09-28 02:50:38 +00001127 // select_cc f32, f32, -1, 0, cc_supported
1128 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1129 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001130 //
1131
1132 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001133 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1134 ISD::CondCode InverseCC =
1135 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001136 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1137 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1138 std::swap(False, True);
1139 CC = DAG.getCondCode(InverseCC);
1140 } else {
1141 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1142 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1143 std::swap(False, True);
1144 std::swap(LHS, RHS);
1145 CC = DAG.getCondCode(SwapInvCC);
1146 }
1147 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001148 }
1149
1150 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1151 (CompareVT == VT || VT == MVT::i32)) {
1152 // This can be matched by a SET* instruction.
1153 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1154 }
1155
Tom Stellard75aadc22012-12-11 21:25:42 +00001156 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001157 //
1158 // CND* can match the following patterns:
1159 //
Tom Stellardcd428182013-09-28 02:50:38 +00001160 // select_cc f32, 0.0, f32, f32, cc_supported
1161 // select_cc f32, 0.0, i32, i32, cc_supported
1162 // select_cc i32, 0, f32, f32, cc_supported
1163 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001164 //
Tom Stellardcd428182013-09-28 02:50:38 +00001165
1166 // Try to move the zero value to the RHS
1167 if (isZero(LHS)) {
1168 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1169 // Try swapping the operands
1170 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1171 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1172 std::swap(LHS, RHS);
1173 CC = DAG.getCondCode(CCSwapped);
1174 } else {
1175 // Try inverting the conditon and then swapping the operands
1176 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1177 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1178 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1179 std::swap(True, False);
1180 std::swap(LHS, RHS);
1181 CC = DAG.getCondCode(CCSwapped);
1182 }
1183 }
1184 }
1185 if (isZero(RHS)) {
1186 SDValue Cond = LHS;
1187 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001188 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1189 if (CompareVT != VT) {
1190 // Bitcast True / False to the correct types. This will end up being
1191 // a nop, but it allows us to define only a single pattern in the
1192 // .TD files for each CND* instruction rather than having to have
1193 // one pattern for integer True/False and one for fp True/False
1194 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1195 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1196 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001197
1198 switch (CCOpcode) {
1199 case ISD::SETONE:
1200 case ISD::SETUNE:
1201 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001202 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1203 Temp = True;
1204 True = False;
1205 False = Temp;
1206 break;
1207 default:
1208 break;
1209 }
1210 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1211 Cond, Zero,
1212 True, False,
1213 DAG.getCondCode(CCOpcode));
1214 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1215 }
1216
Tom Stellard75aadc22012-12-11 21:25:42 +00001217 // If we make it this for it means we have no native instructions to handle
1218 // this SELECT_CC, so we must lower it.
1219 SDValue HWTrue, HWFalse;
1220
1221 if (CompareVT == MVT::f32) {
1222 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1223 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1224 } else if (CompareVT == MVT::i32) {
1225 HWTrue = DAG.getConstant(-1, CompareVT);
1226 HWFalse = DAG.getConstant(0, CompareVT);
1227 }
1228 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001229 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001230 }
1231
1232 // Lower this unsupported SELECT_CC into a combination of two supported
1233 // SELECT_CC operations.
1234 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1235
1236 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1237 Cond, HWFalse,
1238 True, False,
1239 DAG.getCondCode(ISD::SETNE));
1240}
1241
Alp Tokercb402912014-01-24 17:20:08 +00001242/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001243/// convert these pointers to a register index. Each register holds
1244/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1245/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1246/// for indirect addressing.
1247SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1248 unsigned StackWidth,
1249 SelectionDAG &DAG) const {
1250 unsigned SRLPad;
1251 switch(StackWidth) {
1252 case 1:
1253 SRLPad = 2;
1254 break;
1255 case 2:
1256 SRLPad = 3;
1257 break;
1258 case 4:
1259 SRLPad = 4;
1260 break;
1261 default: llvm_unreachable("Invalid stack width");
1262 }
1263
Andrew Trickef9de2a2013-05-25 02:42:55 +00001264 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001265 DAG.getConstant(SRLPad, MVT::i32));
1266}
1267
1268void R600TargetLowering::getStackAddress(unsigned StackWidth,
1269 unsigned ElemIdx,
1270 unsigned &Channel,
1271 unsigned &PtrIncr) const {
1272 switch (StackWidth) {
1273 default:
1274 case 1:
1275 Channel = 0;
1276 if (ElemIdx > 0) {
1277 PtrIncr = 1;
1278 } else {
1279 PtrIncr = 0;
1280 }
1281 break;
1282 case 2:
1283 Channel = ElemIdx % 2;
1284 if (ElemIdx == 2) {
1285 PtrIncr = 1;
1286 } else {
1287 PtrIncr = 0;
1288 }
1289 break;
1290 case 4:
1291 Channel = ElemIdx;
1292 PtrIncr = 0;
1293 break;
1294 }
1295}
1296
Tom Stellard75aadc22012-12-11 21:25:42 +00001297SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001298 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001299 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1300 SDValue Chain = Op.getOperand(0);
1301 SDValue Value = Op.getOperand(1);
1302 SDValue Ptr = Op.getOperand(2);
1303
Tom Stellard2ffc3302013-08-26 15:05:44 +00001304 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001305 if (Result.getNode()) {
1306 return Result;
1307 }
1308
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001309 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1310 if (StoreNode->isTruncatingStore()) {
1311 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001312 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001313 EVT MemVT = StoreNode->getMemoryVT();
1314 SDValue MaskConstant;
1315 if (MemVT == MVT::i8) {
1316 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1317 } else {
1318 assert(MemVT == MVT::i16);
1319 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1320 }
1321 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1322 DAG.getConstant(2, MVT::i32));
1323 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1324 DAG.getConstant(0x00000003, VT));
1325 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1326 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1327 DAG.getConstant(3, VT));
1328 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1329 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1330 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1331 // vector instead.
1332 SDValue Src[4] = {
1333 ShiftedValue,
1334 DAG.getConstant(0, MVT::i32),
1335 DAG.getConstant(0, MVT::i32),
1336 Mask
1337 };
Craig Topper48d114b2014-04-26 18:35:24 +00001338 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001339 SDValue Args[3] = { Chain, Input, DWordAddr };
1340 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001341 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001342 StoreNode->getMemOperand());
1343 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1344 Value.getValueType().bitsGE(MVT::i32)) {
1345 // Convert pointer from byte address to dword address.
1346 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1347 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1348 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001349
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001350 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001351 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001352 } else {
1353 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1354 }
1355 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001356 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001357 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001358
1359 EVT ValueVT = Value.getValueType();
1360
1361 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1362 return SDValue();
1363 }
1364
Tom Stellarde9373602014-01-22 19:24:14 +00001365 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1366 if (Ret.getNode()) {
1367 return Ret;
1368 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001369 // Lowering for indirect addressing
1370
1371 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +00001372 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
1373 getTargetMachine().getSubtargetImpl()->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001374 unsigned StackWidth = TFL->getStackWidth(MF);
1375
1376 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1377
1378 if (ValueVT.isVector()) {
1379 unsigned NumElemVT = ValueVT.getVectorNumElements();
1380 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001381 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001382
1383 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1384 "vector width in load");
1385
1386 for (unsigned i = 0; i < NumElemVT; ++i) {
1387 unsigned Channel, PtrIncr;
1388 getStackAddress(StackWidth, i, Channel, PtrIncr);
1389 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1390 DAG.getConstant(PtrIncr, MVT::i32));
1391 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1392 Value, DAG.getConstant(i, MVT::i32));
1393
1394 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1395 Chain, Elem, Ptr,
1396 DAG.getTargetConstant(Channel, MVT::i32));
1397 }
Craig Topper48d114b2014-04-26 18:35:24 +00001398 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001399 } else {
1400 if (ValueVT == MVT::i8) {
1401 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1402 }
1403 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001404 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001405 }
1406
1407 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001408}
1409
Tom Stellard365366f2013-01-23 02:09:06 +00001410// return (512 + (kc_bank << 12)
1411static int
1412ConstantAddressBlock(unsigned AddressSpace) {
1413 switch (AddressSpace) {
1414 case AMDGPUAS::CONSTANT_BUFFER_0:
1415 return 512;
1416 case AMDGPUAS::CONSTANT_BUFFER_1:
1417 return 512 + 4096;
1418 case AMDGPUAS::CONSTANT_BUFFER_2:
1419 return 512 + 4096 * 2;
1420 case AMDGPUAS::CONSTANT_BUFFER_3:
1421 return 512 + 4096 * 3;
1422 case AMDGPUAS::CONSTANT_BUFFER_4:
1423 return 512 + 4096 * 4;
1424 case AMDGPUAS::CONSTANT_BUFFER_5:
1425 return 512 + 4096 * 5;
1426 case AMDGPUAS::CONSTANT_BUFFER_6:
1427 return 512 + 4096 * 6;
1428 case AMDGPUAS::CONSTANT_BUFFER_7:
1429 return 512 + 4096 * 7;
1430 case AMDGPUAS::CONSTANT_BUFFER_8:
1431 return 512 + 4096 * 8;
1432 case AMDGPUAS::CONSTANT_BUFFER_9:
1433 return 512 + 4096 * 9;
1434 case AMDGPUAS::CONSTANT_BUFFER_10:
1435 return 512 + 4096 * 10;
1436 case AMDGPUAS::CONSTANT_BUFFER_11:
1437 return 512 + 4096 * 11;
1438 case AMDGPUAS::CONSTANT_BUFFER_12:
1439 return 512 + 4096 * 12;
1440 case AMDGPUAS::CONSTANT_BUFFER_13:
1441 return 512 + 4096 * 13;
1442 case AMDGPUAS::CONSTANT_BUFFER_14:
1443 return 512 + 4096 * 14;
1444 case AMDGPUAS::CONSTANT_BUFFER_15:
1445 return 512 + 4096 * 15;
1446 default:
1447 return -1;
1448 }
1449}
1450
1451SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1452{
1453 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001454 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001455 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1456 SDValue Chain = Op.getOperand(0);
1457 SDValue Ptr = Op.getOperand(1);
1458 SDValue LoweredLoad;
1459
Tom Stellarde9373602014-01-22 19:24:14 +00001460 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1461 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001462 SDValue Ops[2] = {
1463 Ret,
1464 Chain
1465 };
Craig Topper64941d92014-04-27 19:20:57 +00001466 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001467 }
1468
Tom Stellard067c8152014-07-21 14:01:14 +00001469 // Lower loads constant address space global variable loads
1470 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
1471 isa<GlobalVariable>(
1472 GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
1473
1474 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1475 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1476 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1477 DAG.getConstant(2, MVT::i32));
1478 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1479 LoadNode->getChain(), Ptr,
1480 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
1481 }
Tom Stellarde9373602014-01-22 19:24:14 +00001482
Tom Stellard35bb18c2013-08-26 15:06:04 +00001483 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1484 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001485 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001486 Chain
1487 };
Craig Topper64941d92014-04-27 19:20:57 +00001488 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001489 }
1490
Tom Stellard365366f2013-01-23 02:09:06 +00001491 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001492 if (ConstantBlock > -1 &&
1493 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1494 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001495 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001496 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1497 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001498 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001499 SDValue Slots[4];
1500 for (unsigned i = 0; i < 4; i++) {
1501 // We want Const position encoded with the following formula :
1502 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1503 // const_index is Ptr computed by llvm using an alignment of 16.
1504 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1505 // then div by 4 at the ISel step
1506 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1507 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1508 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1509 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001510 EVT NewVT = MVT::v4i32;
1511 unsigned NumElements = 4;
1512 if (VT.isVector()) {
1513 NewVT = VT;
1514 NumElements = VT.getVectorNumElements();
1515 }
Craig Topper48d114b2014-04-26 18:35:24 +00001516 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001517 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001518 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001519 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001520 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001521 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001522 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001523 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001524 );
1525 }
1526
1527 if (!VT.isVector()) {
1528 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1529 DAG.getConstant(0, MVT::i32));
1530 }
1531
1532 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001533 Result,
1534 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001535 };
Craig Topper64941d92014-04-27 19:20:57 +00001536 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001537 }
1538
Matt Arsenault909d0c02013-10-30 23:43:29 +00001539 // For most operations returning SDValue() will result in the node being
1540 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1541 // need to manually expand loads that may be legal in some address spaces and
1542 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1543 // compute shaders, since the data is sign extended when it is uploaded to the
1544 // buffer. However SEXT loads from other address spaces are not supported, so
1545 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001546 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1547 EVT MemVT = LoadNode->getMemoryVT();
1548 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1549 SDValue ShiftAmount =
1550 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1551 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1552 LoadNode->getPointerInfo(), MemVT,
1553 LoadNode->isVolatile(),
1554 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001555 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001556 LoadNode->getAlignment());
1557 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1558 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1559
1560 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001561 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001562 }
1563
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001564 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1565 return SDValue();
1566 }
1567
1568 // Lowering for indirect addressing
1569 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +00001570 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
1571 getTargetMachine().getSubtargetImpl()->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001572 unsigned StackWidth = TFL->getStackWidth(MF);
1573
1574 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1575
1576 if (VT.isVector()) {
1577 unsigned NumElemVT = VT.getVectorNumElements();
1578 EVT ElemVT = VT.getVectorElementType();
1579 SDValue Loads[4];
1580
1581 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1582 "vector width in load");
1583
1584 for (unsigned i = 0; i < NumElemVT; ++i) {
1585 unsigned Channel, PtrIncr;
1586 getStackAddress(StackWidth, i, Channel, PtrIncr);
1587 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1588 DAG.getConstant(PtrIncr, MVT::i32));
1589 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1590 Chain, Ptr,
1591 DAG.getTargetConstant(Channel, MVT::i32),
1592 Op.getOperand(2));
1593 }
1594 for (unsigned i = NumElemVT; i < 4; ++i) {
1595 Loads[i] = DAG.getUNDEF(ElemVT);
1596 }
1597 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001598 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001599 } else {
1600 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1601 Chain, Ptr,
1602 DAG.getTargetConstant(0, MVT::i32), // Channel
1603 Op.getOperand(2));
1604 }
1605
Matt Arsenault7939acd2014-04-07 16:44:24 +00001606 SDValue Ops[2] = {
1607 LoweredLoad,
1608 Chain
1609 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001610
Craig Topper64941d92014-04-27 19:20:57 +00001611 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001612}
Tom Stellard75aadc22012-12-11 21:25:42 +00001613
Matt Arsenault1d555c42014-06-23 18:00:55 +00001614SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1615 SDValue Chain = Op.getOperand(0);
1616 SDValue Cond = Op.getOperand(1);
1617 SDValue Jump = Op.getOperand(2);
1618
1619 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1620 Chain, Jump, Cond);
1621}
1622
Tom Stellard75aadc22012-12-11 21:25:42 +00001623/// XXX Only kernel functions are supported, so we can assume for now that
1624/// every function is a kernel function, but in the future we should use
1625/// separate calling conventions for kernel and non-kernel functions.
1626SDValue R600TargetLowering::LowerFormalArguments(
1627 SDValue Chain,
1628 CallingConv::ID CallConv,
1629 bool isVarArg,
1630 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001631 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001632 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001633 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001634 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1635 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001636 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001637 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001638
Tom Stellardaf775432013-10-23 00:44:32 +00001639 SmallVector<ISD::InputArg, 8> LocalIns;
1640
Matt Arsenault209a7b92014-04-18 07:40:20 +00001641 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001642
1643 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001644
Tom Stellard1e803092013-07-23 01:48:18 +00001645 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001646 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001647 const ISD::InputArg &In = Ins[i];
1648 EVT VT = In.VT;
1649 EVT MemVT = VA.getLocVT();
1650 if (!VT.isVector() && MemVT.isVector()) {
1651 // Get load source type if scalarized.
1652 MemVT = MemVT.getVectorElementType();
1653 }
Tom Stellard78e01292013-07-23 01:47:58 +00001654
Jan Veselye5121f32014-10-14 20:05:26 +00001655 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001656 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1657 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1658 InVals.push_back(Register);
1659 continue;
1660 }
1661
Tom Stellard75aadc22012-12-11 21:25:42 +00001662 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001663 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001664
Matt Arsenaultfae02982014-03-17 18:58:11 +00001665 // i64 isn't a legal type, so the register type used ends up as i32, which
1666 // isn't expected here. It attempts to create this sextload, but it ends up
1667 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1668 // for <1 x i64>.
1669
Tom Stellardacfeebf2013-07-23 01:48:05 +00001670 // The first 36 bytes of the input buffer contains information about
1671 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001672 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1673 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1674 // FIXME: This should really check the extload type, but the handling of
1675 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001676
Matt Arsenault74ef2772014-08-13 18:14:11 +00001677 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1678 Ext = ISD::SEXTLOAD;
1679 }
1680
1681 // Compute the offset from the value.
1682 // XXX - I think PartOffset should give you this, but it seems to give the
1683 // size of the register which isn't useful.
1684
1685 unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
1686 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001687 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001688
1689 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1690 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Jan Veselye5121f32014-10-14 20:05:26 +00001691 DAG.getConstant(Offset, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001692 DAG.getUNDEF(MVT::i32),
1693 PtrInfo,
1694 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001695
1696 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001697 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001698 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001699 }
1700 return Chain;
1701}
1702
Matt Arsenault758659232013-05-18 00:21:46 +00001703EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001704 if (!VT.isVector())
1705 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001706 return VT.changeVectorElementTypeToInteger();
1707}
1708
Matt Arsenault209a7b92014-04-18 07:40:20 +00001709static SDValue CompactSwizzlableVector(
1710 SelectionDAG &DAG, SDValue VectorEntry,
1711 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001712 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1713 assert(RemapSwizzle.empty());
1714 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001715 VectorEntry.getOperand(0),
1716 VectorEntry.getOperand(1),
1717 VectorEntry.getOperand(2),
1718 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001719 };
1720
1721 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001722 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1723 // We mask write here to teach later passes that the ith element of this
1724 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1725 // break false dependencies and additionnaly make assembly easier to read.
1726 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001727 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1728 if (C->isZero()) {
1729 RemapSwizzle[i] = 4; // SEL_0
1730 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1731 } else if (C->isExactlyValue(1.0)) {
1732 RemapSwizzle[i] = 5; // SEL_1
1733 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1734 }
1735 }
1736
1737 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1738 continue;
1739 for (unsigned j = 0; j < i; j++) {
1740 if (NewBldVec[i] == NewBldVec[j]) {
1741 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1742 RemapSwizzle[i] = j;
1743 break;
1744 }
1745 }
1746 }
1747
1748 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001749 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001750}
1751
Benjamin Kramer193960c2013-06-11 13:32:25 +00001752static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1753 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001754 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1755 assert(RemapSwizzle.empty());
1756 SDValue NewBldVec[4] = {
1757 VectorEntry.getOperand(0),
1758 VectorEntry.getOperand(1),
1759 VectorEntry.getOperand(2),
1760 VectorEntry.getOperand(3)
1761 };
1762 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001763 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001764 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001765 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1766 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1767 ->getZExtValue();
1768 if (i == Idx)
1769 isUnmovable[Idx] = true;
1770 }
1771 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001772
1773 for (unsigned i = 0; i < 4; i++) {
1774 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1775 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1776 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001777 if (isUnmovable[Idx])
1778 continue;
1779 // Swap i and Idx
1780 std::swap(NewBldVec[Idx], NewBldVec[i]);
1781 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1782 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001783 }
1784 }
1785
1786 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001787 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001788}
1789
1790
1791SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1792SDValue Swz[4], SelectionDAG &DAG) const {
1793 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1794 // Old -> New swizzle values
1795 DenseMap<unsigned, unsigned> SwizzleRemap;
1796
1797 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1798 for (unsigned i = 0; i < 4; i++) {
1799 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1800 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1801 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1802 }
1803
1804 SwizzleRemap.clear();
1805 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1806 for (unsigned i = 0; i < 4; i++) {
1807 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1808 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1809 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1810 }
1811
1812 return BuildVector;
1813}
1814
1815
Tom Stellard75aadc22012-12-11 21:25:42 +00001816//===----------------------------------------------------------------------===//
1817// Custom DAG Optimizations
1818//===----------------------------------------------------------------------===//
1819
1820SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1821 DAGCombinerInfo &DCI) const {
1822 SelectionDAG &DAG = DCI.DAG;
1823
1824 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001825 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001826 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1827 case ISD::FP_ROUND: {
1828 SDValue Arg = N->getOperand(0);
1829 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001830 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001831 Arg.getOperand(0));
1832 }
1833 break;
1834 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001835
1836 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1837 // (i32 select_cc f32, f32, -1, 0 cc)
1838 //
1839 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1840 // this to one of the SET*_DX10 instructions.
1841 case ISD::FP_TO_SINT: {
1842 SDValue FNeg = N->getOperand(0);
1843 if (FNeg.getOpcode() != ISD::FNEG) {
1844 return SDValue();
1845 }
1846 SDValue SelectCC = FNeg.getOperand(0);
1847 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1848 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1849 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1850 !isHWTrueValue(SelectCC.getOperand(2)) ||
1851 !isHWFalseValue(SelectCC.getOperand(3))) {
1852 return SDValue();
1853 }
1854
Andrew Trickef9de2a2013-05-25 02:42:55 +00001855 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001856 SelectCC.getOperand(0), // LHS
1857 SelectCC.getOperand(1), // RHS
1858 DAG.getConstant(-1, MVT::i32), // True
1859 DAG.getConstant(0, MVT::i32), // Flase
1860 SelectCC.getOperand(4)); // CC
1861
1862 break;
1863 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001864
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001865 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1866 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001867 case ISD::INSERT_VECTOR_ELT: {
1868 SDValue InVec = N->getOperand(0);
1869 SDValue InVal = N->getOperand(1);
1870 SDValue EltNo = N->getOperand(2);
1871 SDLoc dl(N);
1872
1873 // If the inserted element is an UNDEF, just use the input vector.
1874 if (InVal.getOpcode() == ISD::UNDEF)
1875 return InVec;
1876
1877 EVT VT = InVec.getValueType();
1878
1879 // If we can't generate a legal BUILD_VECTOR, exit
1880 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1881 return SDValue();
1882
1883 // Check that we know which element is being inserted
1884 if (!isa<ConstantSDNode>(EltNo))
1885 return SDValue();
1886 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1887
1888 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1889 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1890 // vector elements.
1891 SmallVector<SDValue, 8> Ops;
1892 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1893 Ops.append(InVec.getNode()->op_begin(),
1894 InVec.getNode()->op_end());
1895 } else if (InVec.getOpcode() == ISD::UNDEF) {
1896 unsigned NElts = VT.getVectorNumElements();
1897 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1898 } else {
1899 return SDValue();
1900 }
1901
1902 // Insert the element
1903 if (Elt < Ops.size()) {
1904 // All the operands of BUILD_VECTOR must have the same type;
1905 // we enforce that here.
1906 EVT OpVT = Ops[0].getValueType();
1907 if (InVal.getValueType() != OpVT)
1908 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1909 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1910 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1911 Ops[Elt] = InVal;
1912 }
1913
1914 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001915 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001916 }
1917
Tom Stellard365366f2013-01-23 02:09:06 +00001918 // Extract_vec (Build_vector) generated by custom lowering
1919 // also needs to be customly combined
1920 case ISD::EXTRACT_VECTOR_ELT: {
1921 SDValue Arg = N->getOperand(0);
1922 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1923 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1924 unsigned Element = Const->getZExtValue();
1925 return Arg->getOperand(Element);
1926 }
1927 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001928 if (Arg.getOpcode() == ISD::BITCAST &&
1929 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1930 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1931 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001932 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001933 Arg->getOperand(0).getOperand(Element));
1934 }
1935 }
Tom Stellard365366f2013-01-23 02:09:06 +00001936 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001937
1938 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001939 // Try common optimizations
1940 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1941 if (Ret.getNode())
1942 return Ret;
1943
Tom Stellarde06163a2013-02-07 14:02:35 +00001944 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1945 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001946 //
1947 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1948 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001949 SDValue LHS = N->getOperand(0);
1950 if (LHS.getOpcode() != ISD::SELECT_CC) {
1951 return SDValue();
1952 }
1953
1954 SDValue RHS = N->getOperand(1);
1955 SDValue True = N->getOperand(2);
1956 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001957 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001958
1959 if (LHS.getOperand(2).getNode() != True.getNode() ||
1960 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001961 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001962 return SDValue();
1963 }
1964
Tom Stellard5e524892013-03-08 15:37:11 +00001965 switch (NCC) {
1966 default: return SDValue();
1967 case ISD::SETNE: return LHS;
1968 case ISD::SETEQ: {
1969 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1970 LHSCC = ISD::getSetCCInverse(LHSCC,
1971 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001972 if (DCI.isBeforeLegalizeOps() ||
1973 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1974 return DAG.getSelectCC(SDLoc(N),
1975 LHS.getOperand(0),
1976 LHS.getOperand(1),
1977 LHS.getOperand(2),
1978 LHS.getOperand(3),
1979 LHSCC);
1980 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001981 }
Tom Stellard5e524892013-03-08 15:37:11 +00001982 }
Tom Stellardcd428182013-09-28 02:50:38 +00001983 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001984 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001985
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001986 case AMDGPUISD::EXPORT: {
1987 SDValue Arg = N->getOperand(1);
1988 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1989 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001990
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001991 SDValue NewArgs[8] = {
1992 N->getOperand(0), // Chain
1993 SDValue(),
1994 N->getOperand(2), // ArrayBase
1995 N->getOperand(3), // Type
1996 N->getOperand(4), // SWZ_X
1997 N->getOperand(5), // SWZ_Y
1998 N->getOperand(6), // SWZ_Z
1999 N->getOperand(7) // SWZ_W
2000 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002001 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002002 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002003 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002004 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002005 case AMDGPUISD::TEXTURE_FETCH: {
2006 SDValue Arg = N->getOperand(1);
2007 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2008 break;
2009
2010 SDValue NewArgs[19] = {
2011 N->getOperand(0),
2012 N->getOperand(1),
2013 N->getOperand(2),
2014 N->getOperand(3),
2015 N->getOperand(4),
2016 N->getOperand(5),
2017 N->getOperand(6),
2018 N->getOperand(7),
2019 N->getOperand(8),
2020 N->getOperand(9),
2021 N->getOperand(10),
2022 N->getOperand(11),
2023 N->getOperand(12),
2024 N->getOperand(13),
2025 N->getOperand(14),
2026 N->getOperand(15),
2027 N->getOperand(16),
2028 N->getOperand(17),
2029 N->getOperand(18),
2030 };
2031 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2032 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002033 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002034 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002035 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002036
2037 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002038}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002039
2040static bool
2041FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002042 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002043 const R600InstrInfo *TII =
2044 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002045 if (!Src.isMachineOpcode())
2046 return false;
2047 switch (Src.getMachineOpcode()) {
2048 case AMDGPU::FNEG_R600:
2049 if (!Neg.getNode())
2050 return false;
2051 Src = Src.getOperand(0);
2052 Neg = DAG.getTargetConstant(1, MVT::i32);
2053 return true;
2054 case AMDGPU::FABS_R600:
2055 if (!Abs.getNode())
2056 return false;
2057 Src = Src.getOperand(0);
2058 Abs = DAG.getTargetConstant(1, MVT::i32);
2059 return true;
2060 case AMDGPU::CONST_COPY: {
2061 unsigned Opcode = ParentNode->getMachineOpcode();
2062 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2063
2064 if (!Sel.getNode())
2065 return false;
2066
2067 SDValue CstOffset = Src.getOperand(0);
2068 if (ParentNode->getValueType(0).isVector())
2069 return false;
2070
2071 // Gather constants values
2072 int SrcIndices[] = {
2073 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2074 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2075 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2076 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2077 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2078 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2079 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2080 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2081 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2082 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2084 };
2085 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002086 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002087 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2088 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2089 continue;
2090 if (HasDst) {
2091 OtherSrcIdx--;
2092 OtherSelIdx--;
2093 }
2094 if (RegisterSDNode *Reg =
2095 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2096 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002097 ConstantSDNode *Cst
2098 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002099 Consts.push_back(Cst->getZExtValue());
2100 }
2101 }
2102 }
2103
Matt Arsenault37c12d72014-05-12 20:42:57 +00002104 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002105 Consts.push_back(Cst->getZExtValue());
2106 if (!TII->fitsConstReadLimitations(Consts)) {
2107 return false;
2108 }
2109
2110 Sel = CstOffset;
2111 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2112 return true;
2113 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002114 case AMDGPU::MOV_IMM_I32:
2115 case AMDGPU::MOV_IMM_F32: {
2116 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2117 uint64_t ImmValue = 0;
2118
2119
2120 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2121 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2122 float FloatValue = FPC->getValueAPF().convertToFloat();
2123 if (FloatValue == 0.0) {
2124 ImmReg = AMDGPU::ZERO;
2125 } else if (FloatValue == 0.5) {
2126 ImmReg = AMDGPU::HALF;
2127 } else if (FloatValue == 1.0) {
2128 ImmReg = AMDGPU::ONE;
2129 } else {
2130 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2131 }
2132 } else {
2133 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2134 uint64_t Value = C->getZExtValue();
2135 if (Value == 0) {
2136 ImmReg = AMDGPU::ZERO;
2137 } else if (Value == 1) {
2138 ImmReg = AMDGPU::ONE_INT;
2139 } else {
2140 ImmValue = Value;
2141 }
2142 }
2143
2144 // Check that we aren't already using an immediate.
2145 // XXX: It's possible for an instruction to have more than one
2146 // immediate operand, but this is not supported yet.
2147 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2148 if (!Imm.getNode())
2149 return false;
2150 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2151 assert(C);
2152 if (C->getZExtValue())
2153 return false;
2154 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2155 }
2156 Src = DAG.getRegister(ImmReg, MVT::i32);
2157 return true;
2158 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002159 default:
2160 return false;
2161 }
2162}
2163
2164
2165/// \brief Fold the instructions after selecting them
2166SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2167 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002168 const R600InstrInfo *TII =
2169 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002170 if (!Node->isMachineOpcode())
2171 return Node;
2172 unsigned Opcode = Node->getMachineOpcode();
2173 SDValue FakeOp;
2174
2175 std::vector<SDValue> Ops;
Craig Topper66e588b2014-06-29 00:40:57 +00002176 for (const SDUse &I : Node->ops())
2177 Ops.push_back(I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002178
2179 if (Opcode == AMDGPU::DOT_4) {
2180 int OperandIdx[] = {
2181 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2182 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2183 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2184 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2185 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2186 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2187 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2188 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002189 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002190 int NegIdx[] = {
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2199 };
2200 int AbsIdx[] = {
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2209 };
2210 for (unsigned i = 0; i < 8; i++) {
2211 if (OperandIdx[i] < 0)
2212 return Node;
2213 SDValue &Src = Ops[OperandIdx[i] - 1];
2214 SDValue &Neg = Ops[NegIdx[i] - 1];
2215 SDValue &Abs = Ops[AbsIdx[i] - 1];
2216 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2217 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2218 if (HasDst)
2219 SelIdx--;
2220 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002221 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2222 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2223 }
2224 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2225 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2226 SDValue &Src = Ops[i];
2227 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002228 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2229 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002230 } else if (Opcode == AMDGPU::CLAMP_R600) {
2231 SDValue Src = Node->getOperand(0);
2232 if (!Src.isMachineOpcode() ||
2233 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2234 return Node;
2235 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2236 AMDGPU::OpName::clamp);
2237 if (ClampIdx < 0)
2238 return Node;
2239 std::vector<SDValue> Ops;
2240 unsigned NumOp = Src.getNumOperands();
2241 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002242 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002243 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2244 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2245 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002246 } else {
2247 if (!TII->hasInstrModifiers(Opcode))
2248 return Node;
2249 int OperandIdx[] = {
2250 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2251 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2252 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2253 };
2254 int NegIdx[] = {
2255 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2256 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2257 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2258 };
2259 int AbsIdx[] = {
2260 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2261 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2262 -1
2263 };
2264 for (unsigned i = 0; i < 3; i++) {
2265 if (OperandIdx[i] < 0)
2266 return Node;
2267 SDValue &Src = Ops[OperandIdx[i] - 1];
2268 SDValue &Neg = Ops[NegIdx[i] - 1];
2269 SDValue FakeAbs;
2270 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2271 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2272 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002273 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2274 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002275 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002276 ImmIdx--;
2277 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002278 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002279 SDValue &Imm = Ops[ImmIdx];
2280 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002281 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2282 }
2283 }
2284
2285 return Node;
2286}