blob: fb7514e26da8ec99f58fdfe13888a3398fc78bcd [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
33R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000034 AMDGPUTargetLowering(TM),
35 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Tom Stellard75aadc22012-12-11 21:25:42 +000043 computeRegisterProperties();
44
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Matt Arsenault4e466652014-04-16 01:41:30 +000094 // Expand sign extension of vectors
95 if (!Subtarget->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
97
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
105
106 if (!Subtarget->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
110
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
114
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
116
117
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000120 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000121 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
Matt Arsenault2a495972014-11-23 02:57:54 +0000125 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
Tom Stellard1e803092013-07-23 01:48:18 +0000126 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
127 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000128
129 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
Tom Stellard1e803092013-07-23 01:48:18 +0000130 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
131 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000132
133 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000134 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
135 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
136
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000137 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000138 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000139 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000140 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000141 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
142 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000143
Tom Stellard365366f2013-01-23 02:09:06 +0000144 setOperationAction(ISD::LOAD, MVT::i32, Custom);
145 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000146 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
147
Tom Stellard880a80a2014-06-17 16:53:14 +0000148 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
149 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
150 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
151 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
152
153 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
154 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
155 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
156 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
157
Tom Stellard75aadc22012-12-11 21:25:42 +0000158 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000159 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000160 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000161 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000162 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000163
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000164 setOperationAction(ISD::SUB, MVT::i64, Expand);
165
Tom Stellard5f337882014-04-29 23:12:43 +0000166 // These should be replaced by UDVIREM, but it does not happen automatically
167 // during Type Legalization
168 setOperationAction(ISD::UDIV, MVT::i64, Custom);
169 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000170 setOperationAction(ISD::SDIV, MVT::i64, Custom);
171 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000172
Jan Vesely25f36272014-06-18 12:27:13 +0000173 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
174 // to be Legal/Custom in order to avoid library calls.
175 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000176 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000177 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000178
Michel Danzer49812b52013-07-10 16:37:07 +0000179 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
180
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000181 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
182 for (MVT VT : ScalarIntVTs) {
183 setOperationAction(ISD::ADDC, VT, Expand);
184 setOperationAction(ISD::SUBC, VT, Expand);
185 setOperationAction(ISD::ADDE, VT, Expand);
186 setOperationAction(ISD::SUBE, VT, Expand);
187 }
188
Tom Stellardfc455472013-08-12 22:33:21 +0000189 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000190}
191
192MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
193 MachineInstr * MI, MachineBasicBlock * BB) const {
194 MachineFunction * MF = BB->getParent();
195 MachineRegisterInfo &MRI = MF->getRegInfo();
196 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000197 const R600InstrInfo *TII =
198 static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000199
200 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000201 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000202 // Replace LDS_*_RET instruction that don't have any uses with the
203 // equivalent LDS_*_NORET instruction.
204 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000205 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
206 assert(DstIdx != -1);
207 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000208 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
209 // LDS_1A2D support and remove this special case.
210 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
211 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000212 return BB;
213
214 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
215 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000216 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
217 NewMI.addOperand(MI->getOperand(i));
218 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000219 } else {
220 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
221 }
222 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000223 case AMDGPU::CLAMP_R600: {
224 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
225 AMDGPU::MOV,
226 MI->getOperand(0).getReg(),
227 MI->getOperand(1).getReg());
228 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
229 break;
230 }
231
232 case AMDGPU::FABS_R600: {
233 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
234 AMDGPU::MOV,
235 MI->getOperand(0).getReg(),
236 MI->getOperand(1).getReg());
237 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
238 break;
239 }
240
241 case AMDGPU::FNEG_R600: {
242 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
243 AMDGPU::MOV,
244 MI->getOperand(0).getReg(),
245 MI->getOperand(1).getReg());
246 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
247 break;
248 }
249
Tom Stellard75aadc22012-12-11 21:25:42 +0000250 case AMDGPU::MASK_WRITE: {
251 unsigned maskedRegister = MI->getOperand(0).getReg();
252 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
253 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
254 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
255 break;
256 }
257
258 case AMDGPU::MOV_IMM_F32:
259 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
260 MI->getOperand(1).getFPImm()->getValueAPF()
261 .bitcastToAPInt().getZExtValue());
262 break;
263 case AMDGPU::MOV_IMM_I32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getImm());
266 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000267 case AMDGPU::CONST_COPY: {
268 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
269 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000270 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000271 MI->getOperand(1).getImm());
272 break;
273 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000274
275 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000276 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000277 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000278 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
281 .addOperand(MI->getOperand(0))
282 .addOperand(MI->getOperand(1))
283 .addImm(EOP); // Set End of program bit
284 break;
285 }
286
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 case AMDGPU::TXD: {
288 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
289 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000290 MachineOperand &RID = MI->getOperand(4);
291 MachineOperand &SID = MI->getOperand(5);
292 unsigned TextureId = MI->getOperand(6).getImm();
293 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
294 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000295
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 switch (TextureId) {
297 case 5: // Rect
298 CTX = CTY = 0;
299 break;
300 case 6: // Shadow1D
301 SrcW = SrcZ;
302 break;
303 case 7: // Shadow2D
304 SrcW = SrcZ;
305 break;
306 case 8: // ShadowRect
307 CTX = CTY = 0;
308 SrcW = SrcZ;
309 break;
310 case 9: // 1DArray
311 SrcZ = SrcY;
312 CTZ = 0;
313 break;
314 case 10: // 2DArray
315 CTZ = 0;
316 break;
317 case 11: // Shadow1DArray
318 SrcZ = SrcY;
319 CTZ = 0;
320 break;
321 case 12: // Shadow2DArray
322 CTZ = 0;
323 break;
324 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000325 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
326 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000327 .addImm(SrcX)
328 .addImm(SrcY)
329 .addImm(SrcZ)
330 .addImm(SrcW)
331 .addImm(0)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(1)
336 .addImm(2)
337 .addImm(3)
338 .addOperand(RID)
339 .addOperand(SID)
340 .addImm(CTX)
341 .addImm(CTY)
342 .addImm(CTZ)
343 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000344 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
345 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000346 .addImm(SrcX)
347 .addImm(SrcY)
348 .addImm(SrcZ)
349 .addImm(SrcW)
350 .addImm(0)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(1)
355 .addImm(2)
356 .addImm(3)
357 .addOperand(RID)
358 .addOperand(SID)
359 .addImm(CTX)
360 .addImm(CTY)
361 .addImm(CTZ)
362 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000363 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
364 .addOperand(MI->getOperand(0))
365 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000366 .addImm(SrcX)
367 .addImm(SrcY)
368 .addImm(SrcZ)
369 .addImm(SrcW)
370 .addImm(0)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(1)
375 .addImm(2)
376 .addImm(3)
377 .addOperand(RID)
378 .addOperand(SID)
379 .addImm(CTX)
380 .addImm(CTY)
381 .addImm(CTZ)
382 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000383 .addReg(T0, RegState::Implicit)
384 .addReg(T1, RegState::Implicit);
385 break;
386 }
387
388 case AMDGPU::TXD_SHADOW: {
389 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
390 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000391 MachineOperand &RID = MI->getOperand(4);
392 MachineOperand &SID = MI->getOperand(5);
393 unsigned TextureId = MI->getOperand(6).getImm();
394 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
395 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
396
397 switch (TextureId) {
398 case 5: // Rect
399 CTX = CTY = 0;
400 break;
401 case 6: // Shadow1D
402 SrcW = SrcZ;
403 break;
404 case 7: // Shadow2D
405 SrcW = SrcZ;
406 break;
407 case 8: // ShadowRect
408 CTX = CTY = 0;
409 SrcW = SrcZ;
410 break;
411 case 9: // 1DArray
412 SrcZ = SrcY;
413 CTZ = 0;
414 break;
415 case 10: // 2DArray
416 CTZ = 0;
417 break;
418 case 11: // Shadow1DArray
419 SrcZ = SrcY;
420 CTZ = 0;
421 break;
422 case 12: // Shadow2DArray
423 CTZ = 0;
424 break;
425 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000426
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
428 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000429 .addImm(SrcX)
430 .addImm(SrcY)
431 .addImm(SrcZ)
432 .addImm(SrcW)
433 .addImm(0)
434 .addImm(0)
435 .addImm(0)
436 .addImm(0)
437 .addImm(1)
438 .addImm(2)
439 .addImm(3)
440 .addOperand(RID)
441 .addOperand(SID)
442 .addImm(CTX)
443 .addImm(CTY)
444 .addImm(CTZ)
445 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000446 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
447 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000448 .addImm(SrcX)
449 .addImm(SrcY)
450 .addImm(SrcZ)
451 .addImm(SrcW)
452 .addImm(0)
453 .addImm(0)
454 .addImm(0)
455 .addImm(0)
456 .addImm(1)
457 .addImm(2)
458 .addImm(3)
459 .addOperand(RID)
460 .addOperand(SID)
461 .addImm(CTX)
462 .addImm(CTY)
463 .addImm(CTZ)
464 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000465 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
466 .addOperand(MI->getOperand(0))
467 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000468 .addImm(SrcX)
469 .addImm(SrcY)
470 .addImm(SrcZ)
471 .addImm(SrcW)
472 .addImm(0)
473 .addImm(0)
474 .addImm(0)
475 .addImm(0)
476 .addImm(1)
477 .addImm(2)
478 .addImm(3)
479 .addOperand(RID)
480 .addOperand(SID)
481 .addImm(CTX)
482 .addImm(CTY)
483 .addImm(CTZ)
484 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 .addReg(T0, RegState::Implicit)
486 .addReg(T1, RegState::Implicit);
487 break;
488 }
489
490 case AMDGPU::BRANCH:
491 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000492 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000493 break;
494
495 case AMDGPU::BRANCH_COND_f32: {
496 MachineInstr *NewMI =
497 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
498 AMDGPU::PREDICATE_BIT)
499 .addOperand(MI->getOperand(1))
500 .addImm(OPCODE_IS_NOT_ZERO)
501 .addImm(0); // Flags
502 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 .addOperand(MI->getOperand(0))
505 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
506 break;
507 }
508
509 case AMDGPU::BRANCH_COND_i32: {
510 MachineInstr *NewMI =
511 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
512 AMDGPU::PREDICATE_BIT)
513 .addOperand(MI->getOperand(1))
514 .addImm(OPCODE_IS_NOT_ZERO_INT)
515 .addImm(0); // Flags
516 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000517 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 .addOperand(MI->getOperand(0))
519 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
520 break;
521 }
522
Tom Stellard75aadc22012-12-11 21:25:42 +0000523 case AMDGPU::EG_ExportSwz:
524 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000525 // Instruction is left unmodified if its not the last one of its type
526 bool isLastInstructionOfItsType = true;
527 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000528 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000529 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000530 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000531 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
532 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
533 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
534 .getImm();
535 if (CurrentInstExportType == InstExportType) {
536 isLastInstructionOfItsType = false;
537 break;
538 }
539 }
540 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 return BB;
544 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
545 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
546 .addOperand(MI->getOperand(0))
547 .addOperand(MI->getOperand(1))
548 .addOperand(MI->getOperand(2))
549 .addOperand(MI->getOperand(3))
550 .addOperand(MI->getOperand(4))
551 .addOperand(MI->getOperand(5))
552 .addOperand(MI->getOperand(6))
553 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000554 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 break;
556 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000557 case AMDGPU::RETURN: {
558 // RETURN instructions must have the live-out registers as implicit uses,
559 // otherwise they appear dead.
560 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
561 MachineInstrBuilder MIB(*MF, MI);
562 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
563 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
564 return BB;
565 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 }
567
568 MI->eraseFromParent();
569 return BB;
570}
571
572//===----------------------------------------------------------------------===//
573// Custom DAG Lowering Operations
574//===----------------------------------------------------------------------===//
575
Tom Stellard75aadc22012-12-11 21:25:42 +0000576SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000577 MachineFunction &MF = DAG.getMachineFunction();
578 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000579 switch (Op.getOpcode()) {
580 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000581 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
582 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000583 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000584 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000585 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000586 case ISD::FCOS:
587 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000588 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000589 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000590 case ISD::LOAD: {
591 SDValue Result = LowerLOAD(Op, DAG);
592 assert((!Result.getNode() ||
593 Result.getNode()->getNumValues() == 2) &&
594 "Load should return a value and a chain");
595 return Result;
596 }
597
Matt Arsenault1d555c42014-06-23 18:00:55 +0000598 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000599 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000600 case ISD::INTRINSIC_VOID: {
601 SDValue Chain = Op.getOperand(0);
602 unsigned IntrinsicID =
603 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
604 switch (IntrinsicID) {
605 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000606 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
607 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000608 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000609 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000610 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000611 case AMDGPUIntrinsic::R600_store_swizzle: {
612 const SDValue Args[8] = {
613 Chain,
614 Op.getOperand(2), // Export Value
615 Op.getOperand(3), // ArrayBase
616 Op.getOperand(4), // Type
617 DAG.getConstant(0, MVT::i32), // SWZ_X
618 DAG.getConstant(1, MVT::i32), // SWZ_Y
619 DAG.getConstant(2, MVT::i32), // SWZ_Z
620 DAG.getConstant(3, MVT::i32) // SWZ_W
621 };
Craig Topper48d114b2014-04-26 18:35:24 +0000622 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000623 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000624
Tom Stellard75aadc22012-12-11 21:25:42 +0000625 // default for switch(IntrinsicID)
626 default: break;
627 }
628 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
629 break;
630 }
631 case ISD::INTRINSIC_WO_CHAIN: {
632 unsigned IntrinsicID =
633 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
634 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000635 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000636 switch(IntrinsicID) {
637 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000638 case AMDGPUIntrinsic::R600_load_input: {
639 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
640 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
641 MachineFunction &MF = DAG.getMachineFunction();
642 MachineRegisterInfo &MRI = MF.getRegInfo();
643 MRI.addLiveIn(Reg);
644 return DAG.getCopyFromReg(DAG.getEntryNode(),
645 SDLoc(DAG.getEntryNode()), Reg, VT);
646 }
647
648 case AMDGPUIntrinsic::R600_interp_input: {
649 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
650 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
651 MachineSDNode *interp;
652 if (ijb < 0) {
653 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +0000654 const R600InstrInfo *TII = static_cast<const R600InstrInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000655 MF.getSubtarget().getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000656 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
657 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
658 return DAG.getTargetExtractSubreg(
659 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
660 DL, MVT::f32, SDValue(interp, 0));
661 }
662 MachineFunction &MF = DAG.getMachineFunction();
663 MachineRegisterInfo &MRI = MF.getRegInfo();
664 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
665 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
666 MRI.addLiveIn(RegisterI);
667 MRI.addLiveIn(RegisterJ);
668 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
669 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
670 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
671 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
672
673 if (slot % 4 < 2)
674 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
675 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
676 RegisterJNode, RegisterINode);
677 else
678 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
679 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
680 RegisterJNode, RegisterINode);
681 return SDValue(interp, slot % 2);
682 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000683 case AMDGPUIntrinsic::R600_interp_xy:
684 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000685 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000686 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000687 SDValue RegisterINode = Op.getOperand(2);
688 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000689
Vincent Lejeunef143af32013-11-11 22:10:24 +0000690 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000691 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000692 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000693 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000694 else
695 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000696 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000697 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000698 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
699 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000700 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000701 case AMDGPUIntrinsic::R600_tex:
702 case AMDGPUIntrinsic::R600_texc:
703 case AMDGPUIntrinsic::R600_txl:
704 case AMDGPUIntrinsic::R600_txlc:
705 case AMDGPUIntrinsic::R600_txb:
706 case AMDGPUIntrinsic::R600_txbc:
707 case AMDGPUIntrinsic::R600_txf:
708 case AMDGPUIntrinsic::R600_txq:
709 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000710 case AMDGPUIntrinsic::R600_ddy:
711 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000712 unsigned TextureOp;
713 switch (IntrinsicID) {
714 case AMDGPUIntrinsic::R600_tex:
715 TextureOp = 0;
716 break;
717 case AMDGPUIntrinsic::R600_texc:
718 TextureOp = 1;
719 break;
720 case AMDGPUIntrinsic::R600_txl:
721 TextureOp = 2;
722 break;
723 case AMDGPUIntrinsic::R600_txlc:
724 TextureOp = 3;
725 break;
726 case AMDGPUIntrinsic::R600_txb:
727 TextureOp = 4;
728 break;
729 case AMDGPUIntrinsic::R600_txbc:
730 TextureOp = 5;
731 break;
732 case AMDGPUIntrinsic::R600_txf:
733 TextureOp = 6;
734 break;
735 case AMDGPUIntrinsic::R600_txq:
736 TextureOp = 7;
737 break;
738 case AMDGPUIntrinsic::R600_ddx:
739 TextureOp = 8;
740 break;
741 case AMDGPUIntrinsic::R600_ddy:
742 TextureOp = 9;
743 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000744 case AMDGPUIntrinsic::R600_ldptr:
745 TextureOp = 10;
746 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000747 default:
748 llvm_unreachable("Unknow Texture Operation");
749 }
750
751 SDValue TexArgs[19] = {
752 DAG.getConstant(TextureOp, MVT::i32),
753 Op.getOperand(1),
754 DAG.getConstant(0, MVT::i32),
755 DAG.getConstant(1, MVT::i32),
756 DAG.getConstant(2, MVT::i32),
757 DAG.getConstant(3, MVT::i32),
758 Op.getOperand(2),
759 Op.getOperand(3),
760 Op.getOperand(4),
761 DAG.getConstant(0, MVT::i32),
762 DAG.getConstant(1, MVT::i32),
763 DAG.getConstant(2, MVT::i32),
764 DAG.getConstant(3, MVT::i32),
765 Op.getOperand(5),
766 Op.getOperand(6),
767 Op.getOperand(7),
768 Op.getOperand(8),
769 Op.getOperand(9),
770 Op.getOperand(10)
771 };
Craig Topper48d114b2014-04-26 18:35:24 +0000772 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000773 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000774 case AMDGPUIntrinsic::AMDGPU_dp4: {
775 SDValue Args[8] = {
776 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
777 DAG.getConstant(0, MVT::i32)),
778 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
779 DAG.getConstant(0, MVT::i32)),
780 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
781 DAG.getConstant(1, MVT::i32)),
782 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
783 DAG.getConstant(1, MVT::i32)),
784 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
785 DAG.getConstant(2, MVT::i32)),
786 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
787 DAG.getConstant(2, MVT::i32)),
788 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
789 DAG.getConstant(3, MVT::i32)),
790 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
791 DAG.getConstant(3, MVT::i32))
792 };
Craig Topper48d114b2014-04-26 18:35:24 +0000793 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000794 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000795
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000798 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000799 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000800 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000804 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000806 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000807 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000808 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000810 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000812 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000813 return LowerImplicitParameter(DAG, VT, DL, 8);
814
Jan Veselye5121f32014-10-14 20:05:26 +0000815 case Intrinsic::AMDGPU_read_workdim:
816 return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
817
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000818 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
820 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000821 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
823 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000824 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000825 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
826 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000827 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000828 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
829 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000830 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000831 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
832 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000833 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000834 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
835 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000836 case Intrinsic::AMDGPU_rsq:
837 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
838 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000839 }
840 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
841 break;
842 }
843 } // end switch(Op.getOpcode())
844 return SDValue();
845}
846
847void R600TargetLowering::ReplaceNodeResults(SDNode *N,
848 SmallVectorImpl<SDValue> &Results,
849 SelectionDAG &DAG) const {
850 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000851 default:
852 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
853 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000854 case ISD::FP_TO_UINT:
855 if (N->getValueType(0) == MVT::i1) {
856 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
857 return;
858 }
859 // Fall-through. Since we don't care about out of bounds values
860 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
861 // considers some extra cases which are not necessary here.
862 case ISD::FP_TO_SINT: {
863 SDValue Result;
864 if (expandFP_TO_SINT(N, Result, DAG))
865 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000866 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000867 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000868 case ISD::UDIV: {
869 SDValue Op = SDValue(N, 0);
870 SDLoc DL(Op);
871 EVT VT = Op.getValueType();
872 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
873 N->getOperand(0), N->getOperand(1));
874 Results.push_back(UDIVREM);
875 break;
876 }
877 case ISD::UREM: {
878 SDValue Op = SDValue(N, 0);
879 SDLoc DL(Op);
880 EVT VT = Op.getValueType();
881 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
882 N->getOperand(0), N->getOperand(1));
883 Results.push_back(UDIVREM.getValue(1));
884 break;
885 }
886 case ISD::SDIV: {
887 SDValue Op = SDValue(N, 0);
888 SDLoc DL(Op);
889 EVT VT = Op.getValueType();
890 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
891 N->getOperand(0), N->getOperand(1));
892 Results.push_back(SDIVREM);
893 break;
894 }
895 case ISD::SREM: {
896 SDValue Op = SDValue(N, 0);
897 SDLoc DL(Op);
898 EVT VT = Op.getValueType();
899 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
900 N->getOperand(0), N->getOperand(1));
901 Results.push_back(SDIVREM.getValue(1));
902 break;
903 }
904 case ISD::SDIVREM: {
905 SDValue Op = SDValue(N, 1);
906 SDValue RES = LowerSDIVREM(Op, DAG);
907 Results.push_back(RES);
908 Results.push_back(RES.getValue(1));
909 break;
910 }
911 case ISD::UDIVREM: {
912 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000913 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000914 break;
915 }
916 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000917}
918
Tom Stellard880a80a2014-06-17 16:53:14 +0000919SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
920 SDValue Vector) const {
921
922 SDLoc DL(Vector);
923 EVT VecVT = Vector.getValueType();
924 EVT EltVT = VecVT.getVectorElementType();
925 SmallVector<SDValue, 8> Args;
926
927 for (unsigned i = 0, e = VecVT.getVectorNumElements();
928 i != e; ++i) {
929 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
930 Vector, DAG.getConstant(i, getVectorIdxTy())));
931 }
932
933 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
934}
935
936SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
937 SelectionDAG &DAG) const {
938
939 SDLoc DL(Op);
940 SDValue Vector = Op.getOperand(0);
941 SDValue Index = Op.getOperand(1);
942
943 if (isa<ConstantSDNode>(Index) ||
944 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
945 return Op;
946
947 Vector = vectorToVerticalVector(DAG, Vector);
948 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
949 Vector, Index);
950}
951
952SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
953 SelectionDAG &DAG) const {
954 SDLoc DL(Op);
955 SDValue Vector = Op.getOperand(0);
956 SDValue Value = Op.getOperand(1);
957 SDValue Index = Op.getOperand(2);
958
959 if (isa<ConstantSDNode>(Index) ||
960 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
961 return Op;
962
963 Vector = vectorToVerticalVector(DAG, Vector);
964 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
965 Vector, Value, Index);
966 return vectorToVerticalVector(DAG, Insert);
967}
968
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000969SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
970 // On hw >= R700, COS/SIN input must be between -1. and 1.
971 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
972 EVT VT = Op.getValueType();
973 SDValue Arg = Op.getOperand(0);
974 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
975 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
976 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
977 DAG.getConstantFP(0.15915494309, MVT::f32)),
978 DAG.getConstantFP(0.5, MVT::f32)));
979 unsigned TrigNode;
980 switch (Op.getOpcode()) {
981 case ISD::FCOS:
982 TrigNode = AMDGPUISD::COS_HW;
983 break;
984 case ISD::FSIN:
985 TrigNode = AMDGPUISD::SIN_HW;
986 break;
987 default:
988 llvm_unreachable("Wrong trig opcode");
989 }
990 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
991 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
992 DAG.getConstantFP(-0.5, MVT::f32)));
993 if (Gen >= AMDGPUSubtarget::R700)
994 return TrigVal;
995 // On R600 hw, COS/SIN input must be between -Pi and Pi.
996 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
997 DAG.getConstantFP(3.14159265359, MVT::f32));
998}
999
Jan Vesely25f36272014-06-18 12:27:13 +00001000SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1001 SDLoc DL(Op);
1002 EVT VT = Op.getValueType();
1003
1004 SDValue Lo = Op.getOperand(0);
1005 SDValue Hi = Op.getOperand(1);
1006 SDValue Shift = Op.getOperand(2);
1007 SDValue Zero = DAG.getConstant(0, VT);
1008 SDValue One = DAG.getConstant(1, VT);
1009
1010 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1011 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1012 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1013 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1014
1015 // The dance around Width1 is necessary for 0 special case.
1016 // Without it the CompShift might be 32, producing incorrect results in
1017 // Overflow. So we do the shift in two steps, the alternative is to
1018 // add a conditional to filter the special case.
1019
1020 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1021 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1022
1023 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1024 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1025 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1026
1027 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1028 SDValue LoBig = Zero;
1029
1030 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1031 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1032
1033 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1034}
1035
Jan Vesely900ff2e2014-06-18 12:27:15 +00001036SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1037 SDLoc DL(Op);
1038 EVT VT = Op.getValueType();
1039
1040 SDValue Lo = Op.getOperand(0);
1041 SDValue Hi = Op.getOperand(1);
1042 SDValue Shift = Op.getOperand(2);
1043 SDValue Zero = DAG.getConstant(0, VT);
1044 SDValue One = DAG.getConstant(1, VT);
1045
Jan Veselyecf51332014-06-18 12:27:17 +00001046 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1047
Jan Vesely900ff2e2014-06-18 12:27:15 +00001048 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1049 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1050 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1051 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1052
1053 // The dance around Width1 is necessary for 0 special case.
1054 // Without it the CompShift might be 32, producing incorrect results in
1055 // Overflow. So we do the shift in two steps, the alternative is to
1056 // add a conditional to filter the special case.
1057
1058 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1059 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1060
Jan Veselyecf51332014-06-18 12:27:17 +00001061 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001062 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1063 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1064
Jan Veselyecf51332014-06-18 12:27:17 +00001065 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1066 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001067
1068 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1069 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1070
1071 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1072}
1073
Tom Stellard75aadc22012-12-11 21:25:42 +00001074SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1075 return DAG.getNode(
1076 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001077 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001078 MVT::i1,
1079 Op, DAG.getConstantFP(0.0f, MVT::f32),
1080 DAG.getCondCode(ISD::SETNE)
1081 );
1082}
1083
Tom Stellard75aadc22012-12-11 21:25:42 +00001084SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001085 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001086 unsigned DwordOffset) const {
1087 unsigned ByteOffset = DwordOffset * 4;
1088 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001089 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001090
1091 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1092 assert(isInt<16>(ByteOffset));
1093
1094 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1095 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1096 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1097 false, false, false, 0);
1098}
1099
Tom Stellard75aadc22012-12-11 21:25:42 +00001100bool R600TargetLowering::isZero(SDValue Op) const {
1101 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1102 return Cst->isNullValue();
1103 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1104 return CstFP->isZero();
1105 } else {
1106 return false;
1107 }
1108}
1109
1110SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001111 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001112 EVT VT = Op.getValueType();
1113
1114 SDValue LHS = Op.getOperand(0);
1115 SDValue RHS = Op.getOperand(1);
1116 SDValue True = Op.getOperand(2);
1117 SDValue False = Op.getOperand(3);
1118 SDValue CC = Op.getOperand(4);
1119 SDValue Temp;
1120
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001121 if (VT == MVT::f32) {
1122 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1123 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1124 if (MinMax)
1125 return MinMax;
1126 }
1127
Tom Stellard75aadc22012-12-11 21:25:42 +00001128 // LHS and RHS are guaranteed to be the same value type
1129 EVT CompareVT = LHS.getValueType();
1130
1131 // Check if we can lower this to a native operation.
1132
Tom Stellard2add82d2013-03-08 15:37:09 +00001133 // Try to lower to a SET* instruction:
1134 //
1135 // SET* can match the following patterns:
1136 //
Tom Stellardcd428182013-09-28 02:50:38 +00001137 // select_cc f32, f32, -1, 0, cc_supported
1138 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1139 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001140 //
1141
1142 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001143 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1144 ISD::CondCode InverseCC =
1145 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001146 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1147 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1148 std::swap(False, True);
1149 CC = DAG.getCondCode(InverseCC);
1150 } else {
1151 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1152 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1153 std::swap(False, True);
1154 std::swap(LHS, RHS);
1155 CC = DAG.getCondCode(SwapInvCC);
1156 }
1157 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001158 }
1159
1160 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1161 (CompareVT == VT || VT == MVT::i32)) {
1162 // This can be matched by a SET* instruction.
1163 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1164 }
1165
Tom Stellard75aadc22012-12-11 21:25:42 +00001166 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001167 //
1168 // CND* can match the following patterns:
1169 //
Tom Stellardcd428182013-09-28 02:50:38 +00001170 // select_cc f32, 0.0, f32, f32, cc_supported
1171 // select_cc f32, 0.0, i32, i32, cc_supported
1172 // select_cc i32, 0, f32, f32, cc_supported
1173 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001174 //
Tom Stellardcd428182013-09-28 02:50:38 +00001175
1176 // Try to move the zero value to the RHS
1177 if (isZero(LHS)) {
1178 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1179 // Try swapping the operands
1180 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1181 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1182 std::swap(LHS, RHS);
1183 CC = DAG.getCondCode(CCSwapped);
1184 } else {
1185 // Try inverting the conditon and then swapping the operands
1186 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1187 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1188 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1189 std::swap(True, False);
1190 std::swap(LHS, RHS);
1191 CC = DAG.getCondCode(CCSwapped);
1192 }
1193 }
1194 }
1195 if (isZero(RHS)) {
1196 SDValue Cond = LHS;
1197 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001198 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1199 if (CompareVT != VT) {
1200 // Bitcast True / False to the correct types. This will end up being
1201 // a nop, but it allows us to define only a single pattern in the
1202 // .TD files for each CND* instruction rather than having to have
1203 // one pattern for integer True/False and one for fp True/False
1204 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1205 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1206 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001207
1208 switch (CCOpcode) {
1209 case ISD::SETONE:
1210 case ISD::SETUNE:
1211 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001212 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1213 Temp = True;
1214 True = False;
1215 False = Temp;
1216 break;
1217 default:
1218 break;
1219 }
1220 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1221 Cond, Zero,
1222 True, False,
1223 DAG.getCondCode(CCOpcode));
1224 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1225 }
1226
Tom Stellard75aadc22012-12-11 21:25:42 +00001227 // If we make it this for it means we have no native instructions to handle
1228 // this SELECT_CC, so we must lower it.
1229 SDValue HWTrue, HWFalse;
1230
1231 if (CompareVT == MVT::f32) {
1232 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1233 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1234 } else if (CompareVT == MVT::i32) {
1235 HWTrue = DAG.getConstant(-1, CompareVT);
1236 HWFalse = DAG.getConstant(0, CompareVT);
1237 }
1238 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001239 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001240 }
1241
1242 // Lower this unsupported SELECT_CC into a combination of two supported
1243 // SELECT_CC operations.
1244 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1245
1246 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1247 Cond, HWFalse,
1248 True, False,
1249 DAG.getCondCode(ISD::SETNE));
1250}
1251
Alp Tokercb402912014-01-24 17:20:08 +00001252/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001253/// convert these pointers to a register index. Each register holds
1254/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1255/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1256/// for indirect addressing.
1257SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1258 unsigned StackWidth,
1259 SelectionDAG &DAG) const {
1260 unsigned SRLPad;
1261 switch(StackWidth) {
1262 case 1:
1263 SRLPad = 2;
1264 break;
1265 case 2:
1266 SRLPad = 3;
1267 break;
1268 case 4:
1269 SRLPad = 4;
1270 break;
1271 default: llvm_unreachable("Invalid stack width");
1272 }
1273
Andrew Trickef9de2a2013-05-25 02:42:55 +00001274 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001275 DAG.getConstant(SRLPad, MVT::i32));
1276}
1277
1278void R600TargetLowering::getStackAddress(unsigned StackWidth,
1279 unsigned ElemIdx,
1280 unsigned &Channel,
1281 unsigned &PtrIncr) const {
1282 switch (StackWidth) {
1283 default:
1284 case 1:
1285 Channel = 0;
1286 if (ElemIdx > 0) {
1287 PtrIncr = 1;
1288 } else {
1289 PtrIncr = 0;
1290 }
1291 break;
1292 case 2:
1293 Channel = ElemIdx % 2;
1294 if (ElemIdx == 2) {
1295 PtrIncr = 1;
1296 } else {
1297 PtrIncr = 0;
1298 }
1299 break;
1300 case 4:
1301 Channel = ElemIdx;
1302 PtrIncr = 0;
1303 break;
1304 }
1305}
1306
Tom Stellard75aadc22012-12-11 21:25:42 +00001307SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001308 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001309 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1310 SDValue Chain = Op.getOperand(0);
1311 SDValue Value = Op.getOperand(1);
1312 SDValue Ptr = Op.getOperand(2);
1313
Tom Stellard2ffc3302013-08-26 15:05:44 +00001314 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001315 if (Result.getNode()) {
1316 return Result;
1317 }
1318
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001319 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1320 if (StoreNode->isTruncatingStore()) {
1321 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001322 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001323 EVT MemVT = StoreNode->getMemoryVT();
1324 SDValue MaskConstant;
1325 if (MemVT == MVT::i8) {
1326 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1327 } else {
1328 assert(MemVT == MVT::i16);
1329 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1330 }
1331 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1332 DAG.getConstant(2, MVT::i32));
1333 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1334 DAG.getConstant(0x00000003, VT));
1335 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1336 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1337 DAG.getConstant(3, VT));
1338 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1339 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1340 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1341 // vector instead.
1342 SDValue Src[4] = {
1343 ShiftedValue,
1344 DAG.getConstant(0, MVT::i32),
1345 DAG.getConstant(0, MVT::i32),
1346 Mask
1347 };
Craig Topper48d114b2014-04-26 18:35:24 +00001348 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001349 SDValue Args[3] = { Chain, Input, DWordAddr };
1350 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001351 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001352 StoreNode->getMemOperand());
1353 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1354 Value.getValueType().bitsGE(MVT::i32)) {
1355 // Convert pointer from byte address to dword address.
1356 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1357 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1358 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001359
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001360 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001361 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001362 } else {
1363 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1364 }
1365 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001366 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001367 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001368
1369 EVT ValueVT = Value.getValueType();
1370
1371 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1372 return SDValue();
1373 }
1374
Tom Stellarde9373602014-01-22 19:24:14 +00001375 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1376 if (Ret.getNode()) {
1377 return Ret;
1378 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001379 // Lowering for indirect addressing
1380
1381 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +00001382 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
1383 getTargetMachine().getSubtargetImpl()->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001384 unsigned StackWidth = TFL->getStackWidth(MF);
1385
1386 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1387
1388 if (ValueVT.isVector()) {
1389 unsigned NumElemVT = ValueVT.getVectorNumElements();
1390 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001391 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001392
1393 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1394 "vector width in load");
1395
1396 for (unsigned i = 0; i < NumElemVT; ++i) {
1397 unsigned Channel, PtrIncr;
1398 getStackAddress(StackWidth, i, Channel, PtrIncr);
1399 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1400 DAG.getConstant(PtrIncr, MVT::i32));
1401 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1402 Value, DAG.getConstant(i, MVT::i32));
1403
1404 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1405 Chain, Elem, Ptr,
1406 DAG.getTargetConstant(Channel, MVT::i32));
1407 }
Craig Topper48d114b2014-04-26 18:35:24 +00001408 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001409 } else {
1410 if (ValueVT == MVT::i8) {
1411 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1412 }
1413 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001414 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001415 }
1416
1417 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001418}
1419
Tom Stellard365366f2013-01-23 02:09:06 +00001420// return (512 + (kc_bank << 12)
1421static int
1422ConstantAddressBlock(unsigned AddressSpace) {
1423 switch (AddressSpace) {
1424 case AMDGPUAS::CONSTANT_BUFFER_0:
1425 return 512;
1426 case AMDGPUAS::CONSTANT_BUFFER_1:
1427 return 512 + 4096;
1428 case AMDGPUAS::CONSTANT_BUFFER_2:
1429 return 512 + 4096 * 2;
1430 case AMDGPUAS::CONSTANT_BUFFER_3:
1431 return 512 + 4096 * 3;
1432 case AMDGPUAS::CONSTANT_BUFFER_4:
1433 return 512 + 4096 * 4;
1434 case AMDGPUAS::CONSTANT_BUFFER_5:
1435 return 512 + 4096 * 5;
1436 case AMDGPUAS::CONSTANT_BUFFER_6:
1437 return 512 + 4096 * 6;
1438 case AMDGPUAS::CONSTANT_BUFFER_7:
1439 return 512 + 4096 * 7;
1440 case AMDGPUAS::CONSTANT_BUFFER_8:
1441 return 512 + 4096 * 8;
1442 case AMDGPUAS::CONSTANT_BUFFER_9:
1443 return 512 + 4096 * 9;
1444 case AMDGPUAS::CONSTANT_BUFFER_10:
1445 return 512 + 4096 * 10;
1446 case AMDGPUAS::CONSTANT_BUFFER_11:
1447 return 512 + 4096 * 11;
1448 case AMDGPUAS::CONSTANT_BUFFER_12:
1449 return 512 + 4096 * 12;
1450 case AMDGPUAS::CONSTANT_BUFFER_13:
1451 return 512 + 4096 * 13;
1452 case AMDGPUAS::CONSTANT_BUFFER_14:
1453 return 512 + 4096 * 14;
1454 case AMDGPUAS::CONSTANT_BUFFER_15:
1455 return 512 + 4096 * 15;
1456 default:
1457 return -1;
1458 }
1459}
1460
1461SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1462{
1463 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001464 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001465 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1466 SDValue Chain = Op.getOperand(0);
1467 SDValue Ptr = Op.getOperand(1);
1468 SDValue LoweredLoad;
1469
Tom Stellarde9373602014-01-22 19:24:14 +00001470 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1471 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001472 SDValue Ops[2] = {
1473 Ret,
1474 Chain
1475 };
Craig Topper64941d92014-04-27 19:20:57 +00001476 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001477 }
1478
Tom Stellard067c8152014-07-21 14:01:14 +00001479 // Lower loads constant address space global variable loads
1480 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
1481 isa<GlobalVariable>(
1482 GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
1483
1484 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1485 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1486 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1487 DAG.getConstant(2, MVT::i32));
1488 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1489 LoadNode->getChain(), Ptr,
1490 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
1491 }
Tom Stellarde9373602014-01-22 19:24:14 +00001492
Tom Stellard35bb18c2013-08-26 15:06:04 +00001493 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1494 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001495 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001496 Chain
1497 };
Craig Topper64941d92014-04-27 19:20:57 +00001498 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001499 }
1500
Tom Stellard365366f2013-01-23 02:09:06 +00001501 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001502 if (ConstantBlock > -1 &&
1503 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1504 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001505 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001506 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1507 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001508 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001509 SDValue Slots[4];
1510 for (unsigned i = 0; i < 4; i++) {
1511 // We want Const position encoded with the following formula :
1512 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1513 // const_index is Ptr computed by llvm using an alignment of 16.
1514 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1515 // then div by 4 at the ISel step
1516 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1517 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1518 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1519 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001520 EVT NewVT = MVT::v4i32;
1521 unsigned NumElements = 4;
1522 if (VT.isVector()) {
1523 NewVT = VT;
1524 NumElements = VT.getVectorNumElements();
1525 }
Craig Topper48d114b2014-04-26 18:35:24 +00001526 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001527 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001528 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001529 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001530 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001531 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001532 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001533 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001534 );
1535 }
1536
1537 if (!VT.isVector()) {
1538 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1539 DAG.getConstant(0, MVT::i32));
1540 }
1541
1542 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001543 Result,
1544 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001545 };
Craig Topper64941d92014-04-27 19:20:57 +00001546 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001547 }
1548
Matt Arsenault909d0c02013-10-30 23:43:29 +00001549 // For most operations returning SDValue() will result in the node being
1550 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1551 // need to manually expand loads that may be legal in some address spaces and
1552 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1553 // compute shaders, since the data is sign extended when it is uploaded to the
1554 // buffer. However SEXT loads from other address spaces are not supported, so
1555 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001556 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1557 EVT MemVT = LoadNode->getMemoryVT();
1558 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1559 SDValue ShiftAmount =
1560 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1561 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1562 LoadNode->getPointerInfo(), MemVT,
1563 LoadNode->isVolatile(),
1564 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001565 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001566 LoadNode->getAlignment());
1567 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1568 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1569
1570 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001571 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001572 }
1573
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001574 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1575 return SDValue();
1576 }
1577
1578 // Lowering for indirect addressing
1579 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopherd9134482014-08-04 21:25:23 +00001580 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
1581 getTargetMachine().getSubtargetImpl()->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001582 unsigned StackWidth = TFL->getStackWidth(MF);
1583
1584 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1585
1586 if (VT.isVector()) {
1587 unsigned NumElemVT = VT.getVectorNumElements();
1588 EVT ElemVT = VT.getVectorElementType();
1589 SDValue Loads[4];
1590
1591 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1592 "vector width in load");
1593
1594 for (unsigned i = 0; i < NumElemVT; ++i) {
1595 unsigned Channel, PtrIncr;
1596 getStackAddress(StackWidth, i, Channel, PtrIncr);
1597 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1598 DAG.getConstant(PtrIncr, MVT::i32));
1599 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1600 Chain, Ptr,
1601 DAG.getTargetConstant(Channel, MVT::i32),
1602 Op.getOperand(2));
1603 }
1604 for (unsigned i = NumElemVT; i < 4; ++i) {
1605 Loads[i] = DAG.getUNDEF(ElemVT);
1606 }
1607 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001608 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001609 } else {
1610 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1611 Chain, Ptr,
1612 DAG.getTargetConstant(0, MVT::i32), // Channel
1613 Op.getOperand(2));
1614 }
1615
Matt Arsenault7939acd2014-04-07 16:44:24 +00001616 SDValue Ops[2] = {
1617 LoweredLoad,
1618 Chain
1619 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001620
Craig Topper64941d92014-04-27 19:20:57 +00001621 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001622}
Tom Stellard75aadc22012-12-11 21:25:42 +00001623
Matt Arsenault1d555c42014-06-23 18:00:55 +00001624SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1625 SDValue Chain = Op.getOperand(0);
1626 SDValue Cond = Op.getOperand(1);
1627 SDValue Jump = Op.getOperand(2);
1628
1629 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1630 Chain, Jump, Cond);
1631}
1632
Tom Stellard75aadc22012-12-11 21:25:42 +00001633/// XXX Only kernel functions are supported, so we can assume for now that
1634/// every function is a kernel function, but in the future we should use
1635/// separate calling conventions for kernel and non-kernel functions.
1636SDValue R600TargetLowering::LowerFormalArguments(
1637 SDValue Chain,
1638 CallingConv::ID CallConv,
1639 bool isVarArg,
1640 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001641 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001642 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001643 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001644 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1645 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001646 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001647 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001648
Tom Stellardaf775432013-10-23 00:44:32 +00001649 SmallVector<ISD::InputArg, 8> LocalIns;
1650
Matt Arsenault209a7b92014-04-18 07:40:20 +00001651 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001652
1653 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001654
Tom Stellard1e803092013-07-23 01:48:18 +00001655 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001656 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001657 const ISD::InputArg &In = Ins[i];
1658 EVT VT = In.VT;
1659 EVT MemVT = VA.getLocVT();
1660 if (!VT.isVector() && MemVT.isVector()) {
1661 // Get load source type if scalarized.
1662 MemVT = MemVT.getVectorElementType();
1663 }
Tom Stellard78e01292013-07-23 01:47:58 +00001664
Jan Veselye5121f32014-10-14 20:05:26 +00001665 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001666 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1667 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1668 InVals.push_back(Register);
1669 continue;
1670 }
1671
Tom Stellard75aadc22012-12-11 21:25:42 +00001672 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001673 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001674
Matt Arsenaultfae02982014-03-17 18:58:11 +00001675 // i64 isn't a legal type, so the register type used ends up as i32, which
1676 // isn't expected here. It attempts to create this sextload, but it ends up
1677 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1678 // for <1 x i64>.
1679
Tom Stellardacfeebf2013-07-23 01:48:05 +00001680 // The first 36 bytes of the input buffer contains information about
1681 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001682 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1683 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1684 // FIXME: This should really check the extload type, but the handling of
1685 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001686
Matt Arsenault74ef2772014-08-13 18:14:11 +00001687 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1688 Ext = ISD::SEXTLOAD;
1689 }
1690
1691 // Compute the offset from the value.
1692 // XXX - I think PartOffset should give you this, but it seems to give the
1693 // size of the register which isn't useful.
1694
1695 unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
1696 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001697 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001698
1699 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1700 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Jan Veselye5121f32014-10-14 20:05:26 +00001701 DAG.getConstant(Offset, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001702 DAG.getUNDEF(MVT::i32),
1703 PtrInfo,
1704 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001705
1706 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001707 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001708 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001709 }
1710 return Chain;
1711}
1712
Matt Arsenault758659232013-05-18 00:21:46 +00001713EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001714 if (!VT.isVector())
1715 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001716 return VT.changeVectorElementTypeToInteger();
1717}
1718
Matt Arsenault209a7b92014-04-18 07:40:20 +00001719static SDValue CompactSwizzlableVector(
1720 SelectionDAG &DAG, SDValue VectorEntry,
1721 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001722 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1723 assert(RemapSwizzle.empty());
1724 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001725 VectorEntry.getOperand(0),
1726 VectorEntry.getOperand(1),
1727 VectorEntry.getOperand(2),
1728 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001729 };
1730
1731 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001732 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1733 // We mask write here to teach later passes that the ith element of this
1734 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1735 // break false dependencies and additionnaly make assembly easier to read.
1736 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001737 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1738 if (C->isZero()) {
1739 RemapSwizzle[i] = 4; // SEL_0
1740 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1741 } else if (C->isExactlyValue(1.0)) {
1742 RemapSwizzle[i] = 5; // SEL_1
1743 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1744 }
1745 }
1746
1747 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1748 continue;
1749 for (unsigned j = 0; j < i; j++) {
1750 if (NewBldVec[i] == NewBldVec[j]) {
1751 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1752 RemapSwizzle[i] = j;
1753 break;
1754 }
1755 }
1756 }
1757
1758 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001759 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001760}
1761
Benjamin Kramer193960c2013-06-11 13:32:25 +00001762static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1763 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001764 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1765 assert(RemapSwizzle.empty());
1766 SDValue NewBldVec[4] = {
1767 VectorEntry.getOperand(0),
1768 VectorEntry.getOperand(1),
1769 VectorEntry.getOperand(2),
1770 VectorEntry.getOperand(3)
1771 };
1772 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001773 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001774 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001775 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1776 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1777 ->getZExtValue();
1778 if (i == Idx)
1779 isUnmovable[Idx] = true;
1780 }
1781 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001782
1783 for (unsigned i = 0; i < 4; i++) {
1784 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1785 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1786 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001787 if (isUnmovable[Idx])
1788 continue;
1789 // Swap i and Idx
1790 std::swap(NewBldVec[Idx], NewBldVec[i]);
1791 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1792 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001793 }
1794 }
1795
1796 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001797 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001798}
1799
1800
1801SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1802SDValue Swz[4], SelectionDAG &DAG) const {
1803 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1804 // Old -> New swizzle values
1805 DenseMap<unsigned, unsigned> SwizzleRemap;
1806
1807 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1808 for (unsigned i = 0; i < 4; i++) {
1809 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1810 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1811 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1812 }
1813
1814 SwizzleRemap.clear();
1815 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1816 for (unsigned i = 0; i < 4; i++) {
1817 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1818 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1819 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1820 }
1821
1822 return BuildVector;
1823}
1824
1825
Tom Stellard75aadc22012-12-11 21:25:42 +00001826//===----------------------------------------------------------------------===//
1827// Custom DAG Optimizations
1828//===----------------------------------------------------------------------===//
1829
1830SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1831 DAGCombinerInfo &DCI) const {
1832 SelectionDAG &DAG = DCI.DAG;
1833
1834 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001835 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001836 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1837 case ISD::FP_ROUND: {
1838 SDValue Arg = N->getOperand(0);
1839 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001840 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001841 Arg.getOperand(0));
1842 }
1843 break;
1844 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001845
1846 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1847 // (i32 select_cc f32, f32, -1, 0 cc)
1848 //
1849 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1850 // this to one of the SET*_DX10 instructions.
1851 case ISD::FP_TO_SINT: {
1852 SDValue FNeg = N->getOperand(0);
1853 if (FNeg.getOpcode() != ISD::FNEG) {
1854 return SDValue();
1855 }
1856 SDValue SelectCC = FNeg.getOperand(0);
1857 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1858 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1859 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1860 !isHWTrueValue(SelectCC.getOperand(2)) ||
1861 !isHWFalseValue(SelectCC.getOperand(3))) {
1862 return SDValue();
1863 }
1864
Andrew Trickef9de2a2013-05-25 02:42:55 +00001865 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001866 SelectCC.getOperand(0), // LHS
1867 SelectCC.getOperand(1), // RHS
1868 DAG.getConstant(-1, MVT::i32), // True
1869 DAG.getConstant(0, MVT::i32), // Flase
1870 SelectCC.getOperand(4)); // CC
1871
1872 break;
1873 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001874
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001875 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1876 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001877 case ISD::INSERT_VECTOR_ELT: {
1878 SDValue InVec = N->getOperand(0);
1879 SDValue InVal = N->getOperand(1);
1880 SDValue EltNo = N->getOperand(2);
1881 SDLoc dl(N);
1882
1883 // If the inserted element is an UNDEF, just use the input vector.
1884 if (InVal.getOpcode() == ISD::UNDEF)
1885 return InVec;
1886
1887 EVT VT = InVec.getValueType();
1888
1889 // If we can't generate a legal BUILD_VECTOR, exit
1890 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1891 return SDValue();
1892
1893 // Check that we know which element is being inserted
1894 if (!isa<ConstantSDNode>(EltNo))
1895 return SDValue();
1896 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1897
1898 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1899 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1900 // vector elements.
1901 SmallVector<SDValue, 8> Ops;
1902 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1903 Ops.append(InVec.getNode()->op_begin(),
1904 InVec.getNode()->op_end());
1905 } else if (InVec.getOpcode() == ISD::UNDEF) {
1906 unsigned NElts = VT.getVectorNumElements();
1907 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1908 } else {
1909 return SDValue();
1910 }
1911
1912 // Insert the element
1913 if (Elt < Ops.size()) {
1914 // All the operands of BUILD_VECTOR must have the same type;
1915 // we enforce that here.
1916 EVT OpVT = Ops[0].getValueType();
1917 if (InVal.getValueType() != OpVT)
1918 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1919 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1920 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1921 Ops[Elt] = InVal;
1922 }
1923
1924 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001925 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001926 }
1927
Tom Stellard365366f2013-01-23 02:09:06 +00001928 // Extract_vec (Build_vector) generated by custom lowering
1929 // also needs to be customly combined
1930 case ISD::EXTRACT_VECTOR_ELT: {
1931 SDValue Arg = N->getOperand(0);
1932 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1933 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1934 unsigned Element = Const->getZExtValue();
1935 return Arg->getOperand(Element);
1936 }
1937 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001938 if (Arg.getOpcode() == ISD::BITCAST &&
1939 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1940 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1941 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001942 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001943 Arg->getOperand(0).getOperand(Element));
1944 }
1945 }
Tom Stellard365366f2013-01-23 02:09:06 +00001946 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001947
1948 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001949 // Try common optimizations
1950 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1951 if (Ret.getNode())
1952 return Ret;
1953
Tom Stellarde06163a2013-02-07 14:02:35 +00001954 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1955 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001956 //
1957 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1958 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001959 SDValue LHS = N->getOperand(0);
1960 if (LHS.getOpcode() != ISD::SELECT_CC) {
1961 return SDValue();
1962 }
1963
1964 SDValue RHS = N->getOperand(1);
1965 SDValue True = N->getOperand(2);
1966 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001967 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001968
1969 if (LHS.getOperand(2).getNode() != True.getNode() ||
1970 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001971 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001972 return SDValue();
1973 }
1974
Tom Stellard5e524892013-03-08 15:37:11 +00001975 switch (NCC) {
1976 default: return SDValue();
1977 case ISD::SETNE: return LHS;
1978 case ISD::SETEQ: {
1979 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1980 LHSCC = ISD::getSetCCInverse(LHSCC,
1981 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001982 if (DCI.isBeforeLegalizeOps() ||
1983 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1984 return DAG.getSelectCC(SDLoc(N),
1985 LHS.getOperand(0),
1986 LHS.getOperand(1),
1987 LHS.getOperand(2),
1988 LHS.getOperand(3),
1989 LHSCC);
1990 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001991 }
Tom Stellard5e524892013-03-08 15:37:11 +00001992 }
Tom Stellardcd428182013-09-28 02:50:38 +00001993 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001994 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001995
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001996 case AMDGPUISD::EXPORT: {
1997 SDValue Arg = N->getOperand(1);
1998 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1999 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002000
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002001 SDValue NewArgs[8] = {
2002 N->getOperand(0), // Chain
2003 SDValue(),
2004 N->getOperand(2), // ArrayBase
2005 N->getOperand(3), // Type
2006 N->getOperand(4), // SWZ_X
2007 N->getOperand(5), // SWZ_Y
2008 N->getOperand(6), // SWZ_Z
2009 N->getOperand(7) // SWZ_W
2010 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002011 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002012 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002013 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002014 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002015 case AMDGPUISD::TEXTURE_FETCH: {
2016 SDValue Arg = N->getOperand(1);
2017 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2018 break;
2019
2020 SDValue NewArgs[19] = {
2021 N->getOperand(0),
2022 N->getOperand(1),
2023 N->getOperand(2),
2024 N->getOperand(3),
2025 N->getOperand(4),
2026 N->getOperand(5),
2027 N->getOperand(6),
2028 N->getOperand(7),
2029 N->getOperand(8),
2030 N->getOperand(9),
2031 N->getOperand(10),
2032 N->getOperand(11),
2033 N->getOperand(12),
2034 N->getOperand(13),
2035 N->getOperand(14),
2036 N->getOperand(15),
2037 N->getOperand(16),
2038 N->getOperand(17),
2039 N->getOperand(18),
2040 };
2041 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2042 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002043 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002044 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002045 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002046
2047 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002048}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002049
2050static bool
2051FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002052 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002053 const R600InstrInfo *TII =
2054 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002055 if (!Src.isMachineOpcode())
2056 return false;
2057 switch (Src.getMachineOpcode()) {
2058 case AMDGPU::FNEG_R600:
2059 if (!Neg.getNode())
2060 return false;
2061 Src = Src.getOperand(0);
2062 Neg = DAG.getTargetConstant(1, MVT::i32);
2063 return true;
2064 case AMDGPU::FABS_R600:
2065 if (!Abs.getNode())
2066 return false;
2067 Src = Src.getOperand(0);
2068 Abs = DAG.getTargetConstant(1, MVT::i32);
2069 return true;
2070 case AMDGPU::CONST_COPY: {
2071 unsigned Opcode = ParentNode->getMachineOpcode();
2072 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2073
2074 if (!Sel.getNode())
2075 return false;
2076
2077 SDValue CstOffset = Src.getOperand(0);
2078 if (ParentNode->getValueType(0).isVector())
2079 return false;
2080
2081 // Gather constants values
2082 int SrcIndices[] = {
2083 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2084 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2085 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2086 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2087 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2088 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2089 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2090 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2091 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2092 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2093 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2094 };
2095 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002096 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002097 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2098 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2099 continue;
2100 if (HasDst) {
2101 OtherSrcIdx--;
2102 OtherSelIdx--;
2103 }
2104 if (RegisterSDNode *Reg =
2105 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2106 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002107 ConstantSDNode *Cst
2108 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002109 Consts.push_back(Cst->getZExtValue());
2110 }
2111 }
2112 }
2113
Matt Arsenault37c12d72014-05-12 20:42:57 +00002114 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002115 Consts.push_back(Cst->getZExtValue());
2116 if (!TII->fitsConstReadLimitations(Consts)) {
2117 return false;
2118 }
2119
2120 Sel = CstOffset;
2121 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2122 return true;
2123 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002124 case AMDGPU::MOV_IMM_I32:
2125 case AMDGPU::MOV_IMM_F32: {
2126 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2127 uint64_t ImmValue = 0;
2128
2129
2130 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2131 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2132 float FloatValue = FPC->getValueAPF().convertToFloat();
2133 if (FloatValue == 0.0) {
2134 ImmReg = AMDGPU::ZERO;
2135 } else if (FloatValue == 0.5) {
2136 ImmReg = AMDGPU::HALF;
2137 } else if (FloatValue == 1.0) {
2138 ImmReg = AMDGPU::ONE;
2139 } else {
2140 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2141 }
2142 } else {
2143 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2144 uint64_t Value = C->getZExtValue();
2145 if (Value == 0) {
2146 ImmReg = AMDGPU::ZERO;
2147 } else if (Value == 1) {
2148 ImmReg = AMDGPU::ONE_INT;
2149 } else {
2150 ImmValue = Value;
2151 }
2152 }
2153
2154 // Check that we aren't already using an immediate.
2155 // XXX: It's possible for an instruction to have more than one
2156 // immediate operand, but this is not supported yet.
2157 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2158 if (!Imm.getNode())
2159 return false;
2160 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2161 assert(C);
2162 if (C->getZExtValue())
2163 return false;
2164 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2165 }
2166 Src = DAG.getRegister(ImmReg, MVT::i32);
2167 return true;
2168 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002169 default:
2170 return false;
2171 }
2172}
2173
2174
2175/// \brief Fold the instructions after selecting them
2176SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2177 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002178 const R600InstrInfo *TII =
2179 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002180 if (!Node->isMachineOpcode())
2181 return Node;
2182 unsigned Opcode = Node->getMachineOpcode();
2183 SDValue FakeOp;
2184
2185 std::vector<SDValue> Ops;
Craig Topper66e588b2014-06-29 00:40:57 +00002186 for (const SDUse &I : Node->ops())
2187 Ops.push_back(I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002188
2189 if (Opcode == AMDGPU::DOT_4) {
2190 int OperandIdx[] = {
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002199 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002200 int NegIdx[] = {
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2209 };
2210 int AbsIdx[] = {
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2219 };
2220 for (unsigned i = 0; i < 8; i++) {
2221 if (OperandIdx[i] < 0)
2222 return Node;
2223 SDValue &Src = Ops[OperandIdx[i] - 1];
2224 SDValue &Neg = Ops[NegIdx[i] - 1];
2225 SDValue &Abs = Ops[AbsIdx[i] - 1];
2226 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2227 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2228 if (HasDst)
2229 SelIdx--;
2230 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002231 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2232 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2233 }
2234 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2235 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2236 SDValue &Src = Ops[i];
2237 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002238 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2239 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002240 } else if (Opcode == AMDGPU::CLAMP_R600) {
2241 SDValue Src = Node->getOperand(0);
2242 if (!Src.isMachineOpcode() ||
2243 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2244 return Node;
2245 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2246 AMDGPU::OpName::clamp);
2247 if (ClampIdx < 0)
2248 return Node;
2249 std::vector<SDValue> Ops;
2250 unsigned NumOp = Src.getNumOperands();
2251 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002252 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002253 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2254 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2255 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002256 } else {
2257 if (!TII->hasInstrModifiers(Opcode))
2258 return Node;
2259 int OperandIdx[] = {
2260 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2261 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2262 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2263 };
2264 int NegIdx[] = {
2265 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2266 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2267 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2268 };
2269 int AbsIdx[] = {
2270 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2271 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2272 -1
2273 };
2274 for (unsigned i = 0; i < 3; i++) {
2275 if (OperandIdx[i] < 0)
2276 return Node;
2277 SDValue &Src = Ops[OperandIdx[i] - 1];
2278 SDValue &Neg = Ops[NegIdx[i] - 1];
2279 SDValue FakeAbs;
2280 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2281 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2282 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002283 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2284 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002285 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002286 ImmIdx--;
2287 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002288 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002289 SDValue &Imm = Ops[ImmIdx];
2290 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002291 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2292 }
2293 }
2294
2295 return Node;
2296}