blob: 996117c4bd2a9192aa18b7080f994164578c3c0d [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000071 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000072
73 setOperationAction(ISD::FSUB, MVT::f32, Expand);
74
75 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000078
Tom Stellard75aadc22012-12-11 21:25:42 +000079 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
80 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
81
Tom Stellarde8f9f282013-03-08 15:37:05 +000082 setOperationAction(ISD::SETCC, MVT::i32, Expand);
83 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000084 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
85
Tom Stellard53f2f902013-09-05 18:38:03 +000086 setOperationAction(ISD::SELECT, MVT::i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::f32, Expand);
88 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000090
Matt Arsenault4e466652014-04-16 01:41:30 +000091 // Expand sign extension of vectors
92 if (!Subtarget->hasBFE())
93 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
94
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
97
98 if (!Subtarget->hasBFE())
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
101 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
102
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
107
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
111
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
113
114
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000115 // Legalize loads and stores to the private address space.
116 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000117 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000119
120 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
121 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000122 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
123 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
125 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000126 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
127 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
128
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000130 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000131 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000132 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000133 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
134 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000135
Tom Stellard365366f2013-01-23 02:09:06 +0000136 setOperationAction(ISD::LOAD, MVT::i32, Custom);
137 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000138 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
139
Tom Stellard880a80a2014-06-17 16:53:14 +0000140 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
141 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
143 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
144
145 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
147 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
148 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
149
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000151 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000152 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000153 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000154 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000155
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000156 setOperationAction(ISD::SUB, MVT::i64, Expand);
157
Tom Stellard5f337882014-04-29 23:12:43 +0000158 // These should be replaced by UDVIREM, but it does not happen automatically
159 // during Type Legalization
160 setOperationAction(ISD::UDIV, MVT::i64, Custom);
161 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000162 setOperationAction(ISD::SDIV, MVT::i64, Custom);
163 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000164
Jan Vesely25f36272014-06-18 12:27:13 +0000165 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
166 // to be Legal/Custom in order to avoid library calls.
167 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000168 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000169 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000170
Michel Danzer49812b52013-07-10 16:37:07 +0000171 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
172
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000173 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
174 for (MVT VT : ScalarIntVTs) {
175 setOperationAction(ISD::ADDC, VT, Expand);
176 setOperationAction(ISD::SUBC, VT, Expand);
177 setOperationAction(ISD::ADDE, VT, Expand);
178 setOperationAction(ISD::SUBE, VT, Expand);
179 }
180
Tom Stellardb852af52013-03-08 15:37:03 +0000181 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000182 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000183 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000184}
185
186MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
187 MachineInstr * MI, MachineBasicBlock * BB) const {
188 MachineFunction * MF = BB->getParent();
189 MachineRegisterInfo &MRI = MF->getRegInfo();
190 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000191 const R600InstrInfo *TII =
192 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000193
194 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000195 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000196 // Replace LDS_*_RET instruction that don't have any uses with the
197 // equivalent LDS_*_NORET instruction.
198 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000199 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
200 assert(DstIdx != -1);
201 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000202 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
203 return BB;
204
205 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
206 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000207 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
208 NewMI.addOperand(MI->getOperand(i));
209 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000210 } else {
211 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
212 }
213 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000214 case AMDGPU::CLAMP_R600: {
215 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
216 AMDGPU::MOV,
217 MI->getOperand(0).getReg(),
218 MI->getOperand(1).getReg());
219 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
220 break;
221 }
222
223 case AMDGPU::FABS_R600: {
224 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
225 AMDGPU::MOV,
226 MI->getOperand(0).getReg(),
227 MI->getOperand(1).getReg());
228 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
229 break;
230 }
231
232 case AMDGPU::FNEG_R600: {
233 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
234 AMDGPU::MOV,
235 MI->getOperand(0).getReg(),
236 MI->getOperand(1).getReg());
237 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
238 break;
239 }
240
Tom Stellard75aadc22012-12-11 21:25:42 +0000241 case AMDGPU::MASK_WRITE: {
242 unsigned maskedRegister = MI->getOperand(0).getReg();
243 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
244 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
245 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
246 break;
247 }
248
249 case AMDGPU::MOV_IMM_F32:
250 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
251 MI->getOperand(1).getFPImm()->getValueAPF()
252 .bitcastToAPInt().getZExtValue());
253 break;
254 case AMDGPU::MOV_IMM_I32:
255 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
256 MI->getOperand(1).getImm());
257 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000258 case AMDGPU::CONST_COPY: {
259 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
260 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000261 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000262 MI->getOperand(1).getImm());
263 break;
264 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000265
266 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000267 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000268 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000269 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000270
271 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
272 .addOperand(MI->getOperand(0))
273 .addOperand(MI->getOperand(1))
274 .addImm(EOP); // Set End of program bit
275 break;
276 }
277
Tom Stellard75aadc22012-12-11 21:25:42 +0000278 case AMDGPU::TXD: {
279 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
280 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000281 MachineOperand &RID = MI->getOperand(4);
282 MachineOperand &SID = MI->getOperand(5);
283 unsigned TextureId = MI->getOperand(6).getImm();
284 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
285 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000286
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000287 switch (TextureId) {
288 case 5: // Rect
289 CTX = CTY = 0;
290 break;
291 case 6: // Shadow1D
292 SrcW = SrcZ;
293 break;
294 case 7: // Shadow2D
295 SrcW = SrcZ;
296 break;
297 case 8: // ShadowRect
298 CTX = CTY = 0;
299 SrcW = SrcZ;
300 break;
301 case 9: // 1DArray
302 SrcZ = SrcY;
303 CTZ = 0;
304 break;
305 case 10: // 2DArray
306 CTZ = 0;
307 break;
308 case 11: // Shadow1DArray
309 SrcZ = SrcY;
310 CTZ = 0;
311 break;
312 case 12: // Shadow2DArray
313 CTZ = 0;
314 break;
315 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000316 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
317 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000318 .addImm(SrcX)
319 .addImm(SrcY)
320 .addImm(SrcZ)
321 .addImm(SrcW)
322 .addImm(0)
323 .addImm(0)
324 .addImm(0)
325 .addImm(0)
326 .addImm(1)
327 .addImm(2)
328 .addImm(3)
329 .addOperand(RID)
330 .addOperand(SID)
331 .addImm(CTX)
332 .addImm(CTY)
333 .addImm(CTZ)
334 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000335 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
336 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000337 .addImm(SrcX)
338 .addImm(SrcY)
339 .addImm(SrcZ)
340 .addImm(SrcW)
341 .addImm(0)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(1)
346 .addImm(2)
347 .addImm(3)
348 .addOperand(RID)
349 .addOperand(SID)
350 .addImm(CTX)
351 .addImm(CTY)
352 .addImm(CTZ)
353 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000354 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
355 .addOperand(MI->getOperand(0))
356 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 .addReg(T0, RegState::Implicit)
375 .addReg(T1, RegState::Implicit);
376 break;
377 }
378
379 case AMDGPU::TXD_SHADOW: {
380 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
381 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000382 MachineOperand &RID = MI->getOperand(4);
383 MachineOperand &SID = MI->getOperand(5);
384 unsigned TextureId = MI->getOperand(6).getImm();
385 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
386 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
387
388 switch (TextureId) {
389 case 5: // Rect
390 CTX = CTY = 0;
391 break;
392 case 6: // Shadow1D
393 SrcW = SrcZ;
394 break;
395 case 7: // Shadow2D
396 SrcW = SrcZ;
397 break;
398 case 8: // ShadowRect
399 CTX = CTY = 0;
400 SrcW = SrcZ;
401 break;
402 case 9: // 1DArray
403 SrcZ = SrcY;
404 CTZ = 0;
405 break;
406 case 10: // 2DArray
407 CTZ = 0;
408 break;
409 case 11: // Shadow1DArray
410 SrcZ = SrcY;
411 CTZ = 0;
412 break;
413 case 12: // Shadow2DArray
414 CTZ = 0;
415 break;
416 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000417
418 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
419 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000420 .addImm(SrcX)
421 .addImm(SrcY)
422 .addImm(SrcZ)
423 .addImm(SrcW)
424 .addImm(0)
425 .addImm(0)
426 .addImm(0)
427 .addImm(0)
428 .addImm(1)
429 .addImm(2)
430 .addImm(3)
431 .addOperand(RID)
432 .addOperand(SID)
433 .addImm(CTX)
434 .addImm(CTY)
435 .addImm(CTZ)
436 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000437 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
438 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000439 .addImm(SrcX)
440 .addImm(SrcY)
441 .addImm(SrcZ)
442 .addImm(SrcW)
443 .addImm(0)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(1)
448 .addImm(2)
449 .addImm(3)
450 .addOperand(RID)
451 .addOperand(SID)
452 .addImm(CTX)
453 .addImm(CTY)
454 .addImm(CTZ)
455 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000456 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
457 .addOperand(MI->getOperand(0))
458 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 .addReg(T0, RegState::Implicit)
477 .addReg(T1, RegState::Implicit);
478 break;
479 }
480
481 case AMDGPU::BRANCH:
482 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000483 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 break;
485
486 case AMDGPU::BRANCH_COND_f32: {
487 MachineInstr *NewMI =
488 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
489 AMDGPU::PREDICATE_BIT)
490 .addOperand(MI->getOperand(1))
491 .addImm(OPCODE_IS_NOT_ZERO)
492 .addImm(0); // Flags
493 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000494 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 .addOperand(MI->getOperand(0))
496 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
497 break;
498 }
499
500 case AMDGPU::BRANCH_COND_i32: {
501 MachineInstr *NewMI =
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
503 AMDGPU::PREDICATE_BIT)
504 .addOperand(MI->getOperand(1))
505 .addImm(OPCODE_IS_NOT_ZERO_INT)
506 .addImm(0); // Flags
507 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 .addOperand(MI->getOperand(0))
510 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
511 break;
512 }
513
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case AMDGPU::EG_ExportSwz:
515 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000516 // Instruction is left unmodified if its not the last one of its type
517 bool isLastInstructionOfItsType = true;
518 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000519 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000520 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000521 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000522 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
523 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
524 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
525 .getImm();
526 if (CurrentInstExportType == InstExportType) {
527 isLastInstructionOfItsType = false;
528 break;
529 }
530 }
531 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000532 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000533 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 return BB;
535 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
536 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
537 .addOperand(MI->getOperand(0))
538 .addOperand(MI->getOperand(1))
539 .addOperand(MI->getOperand(2))
540 .addOperand(MI->getOperand(3))
541 .addOperand(MI->getOperand(4))
542 .addOperand(MI->getOperand(5))
543 .addOperand(MI->getOperand(6))
544 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000545 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 break;
547 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000548 case AMDGPU::RETURN: {
549 // RETURN instructions must have the live-out registers as implicit uses,
550 // otherwise they appear dead.
551 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
552 MachineInstrBuilder MIB(*MF, MI);
553 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
554 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
555 return BB;
556 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 }
558
559 MI->eraseFromParent();
560 return BB;
561}
562
563//===----------------------------------------------------------------------===//
564// Custom DAG Lowering Operations
565//===----------------------------------------------------------------------===//
566
Tom Stellard75aadc22012-12-11 21:25:42 +0000567SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000568 MachineFunction &MF = DAG.getMachineFunction();
569 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000570 switch (Op.getOpcode()) {
571 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000572 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
573 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000574 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000575 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000576 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000577 case ISD::FCOS:
578 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000579 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000581 case ISD::LOAD: return LowerLOAD(Op, DAG);
Matt Arsenault1d555c42014-06-23 18:00:55 +0000582 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000583 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000584 case ISD::INTRINSIC_VOID: {
585 SDValue Chain = Op.getOperand(0);
586 unsigned IntrinsicID =
587 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
588 switch (IntrinsicID) {
589 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
591 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000592 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000593 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000594 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000595 case AMDGPUIntrinsic::R600_store_swizzle: {
596 const SDValue Args[8] = {
597 Chain,
598 Op.getOperand(2), // Export Value
599 Op.getOperand(3), // ArrayBase
600 Op.getOperand(4), // Type
601 DAG.getConstant(0, MVT::i32), // SWZ_X
602 DAG.getConstant(1, MVT::i32), // SWZ_Y
603 DAG.getConstant(2, MVT::i32), // SWZ_Z
604 DAG.getConstant(3, MVT::i32) // SWZ_W
605 };
Craig Topper48d114b2014-04-26 18:35:24 +0000606 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000607 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000608
Tom Stellard75aadc22012-12-11 21:25:42 +0000609 // default for switch(IntrinsicID)
610 default: break;
611 }
612 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
613 break;
614 }
615 case ISD::INTRINSIC_WO_CHAIN: {
616 unsigned IntrinsicID =
617 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
618 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000619 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 switch(IntrinsicID) {
621 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000622 case AMDGPUIntrinsic::R600_load_input: {
623 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
624 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
625 MachineFunction &MF = DAG.getMachineFunction();
626 MachineRegisterInfo &MRI = MF.getRegInfo();
627 MRI.addLiveIn(Reg);
628 return DAG.getCopyFromReg(DAG.getEntryNode(),
629 SDLoc(DAG.getEntryNode()), Reg, VT);
630 }
631
632 case AMDGPUIntrinsic::R600_interp_input: {
633 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
634 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
635 MachineSDNode *interp;
636 if (ijb < 0) {
637 const MachineFunction &MF = DAG.getMachineFunction();
638 const R600InstrInfo *TII =
639 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
640 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
641 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
642 return DAG.getTargetExtractSubreg(
643 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
644 DL, MVT::f32, SDValue(interp, 0));
645 }
646 MachineFunction &MF = DAG.getMachineFunction();
647 MachineRegisterInfo &MRI = MF.getRegInfo();
648 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
649 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
650 MRI.addLiveIn(RegisterI);
651 MRI.addLiveIn(RegisterJ);
652 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
653 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
654 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
655 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
656
657 if (slot % 4 < 2)
658 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
659 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
660 RegisterJNode, RegisterINode);
661 else
662 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
663 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
664 RegisterJNode, RegisterINode);
665 return SDValue(interp, slot % 2);
666 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000667 case AMDGPUIntrinsic::R600_interp_xy:
668 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000670 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000671 SDValue RegisterINode = Op.getOperand(2);
672 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000673
Vincent Lejeunef143af32013-11-11 22:10:24 +0000674 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000675 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000676 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000677 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000678 else
679 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000680 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000681 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000682 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
683 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000685 case AMDGPUIntrinsic::R600_tex:
686 case AMDGPUIntrinsic::R600_texc:
687 case AMDGPUIntrinsic::R600_txl:
688 case AMDGPUIntrinsic::R600_txlc:
689 case AMDGPUIntrinsic::R600_txb:
690 case AMDGPUIntrinsic::R600_txbc:
691 case AMDGPUIntrinsic::R600_txf:
692 case AMDGPUIntrinsic::R600_txq:
693 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000694 case AMDGPUIntrinsic::R600_ddy:
695 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 unsigned TextureOp;
697 switch (IntrinsicID) {
698 case AMDGPUIntrinsic::R600_tex:
699 TextureOp = 0;
700 break;
701 case AMDGPUIntrinsic::R600_texc:
702 TextureOp = 1;
703 break;
704 case AMDGPUIntrinsic::R600_txl:
705 TextureOp = 2;
706 break;
707 case AMDGPUIntrinsic::R600_txlc:
708 TextureOp = 3;
709 break;
710 case AMDGPUIntrinsic::R600_txb:
711 TextureOp = 4;
712 break;
713 case AMDGPUIntrinsic::R600_txbc:
714 TextureOp = 5;
715 break;
716 case AMDGPUIntrinsic::R600_txf:
717 TextureOp = 6;
718 break;
719 case AMDGPUIntrinsic::R600_txq:
720 TextureOp = 7;
721 break;
722 case AMDGPUIntrinsic::R600_ddx:
723 TextureOp = 8;
724 break;
725 case AMDGPUIntrinsic::R600_ddy:
726 TextureOp = 9;
727 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000728 case AMDGPUIntrinsic::R600_ldptr:
729 TextureOp = 10;
730 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000731 default:
732 llvm_unreachable("Unknow Texture Operation");
733 }
734
735 SDValue TexArgs[19] = {
736 DAG.getConstant(TextureOp, MVT::i32),
737 Op.getOperand(1),
738 DAG.getConstant(0, MVT::i32),
739 DAG.getConstant(1, MVT::i32),
740 DAG.getConstant(2, MVT::i32),
741 DAG.getConstant(3, MVT::i32),
742 Op.getOperand(2),
743 Op.getOperand(3),
744 Op.getOperand(4),
745 DAG.getConstant(0, MVT::i32),
746 DAG.getConstant(1, MVT::i32),
747 DAG.getConstant(2, MVT::i32),
748 DAG.getConstant(3, MVT::i32),
749 Op.getOperand(5),
750 Op.getOperand(6),
751 Op.getOperand(7),
752 Op.getOperand(8),
753 Op.getOperand(9),
754 Op.getOperand(10)
755 };
Craig Topper48d114b2014-04-26 18:35:24 +0000756 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000757 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000758 case AMDGPUIntrinsic::AMDGPU_dp4: {
759 SDValue Args[8] = {
760 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
761 DAG.getConstant(0, MVT::i32)),
762 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
763 DAG.getConstant(0, MVT::i32)),
764 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
765 DAG.getConstant(1, MVT::i32)),
766 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
767 DAG.getConstant(1, MVT::i32)),
768 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
769 DAG.getConstant(2, MVT::i32)),
770 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
771 DAG.getConstant(2, MVT::i32)),
772 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
773 DAG.getConstant(3, MVT::i32)),
774 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
775 DAG.getConstant(3, MVT::i32))
776 };
Craig Topper48d114b2014-04-26 18:35:24 +0000777 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000778 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000779
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000786 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000787 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000788 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000789 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000792 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000793 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000794 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000795 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return LowerImplicitParameter(DAG, VT, DL, 8);
798
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
804 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
807 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000808 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
810 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000811 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
813 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000814 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000815 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
816 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000817 case Intrinsic::AMDGPU_rsq:
818 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
819 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000820 }
821 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
822 break;
823 }
824 } // end switch(Op.getOpcode())
825 return SDValue();
826}
827
828void R600TargetLowering::ReplaceNodeResults(SDNode *N,
829 SmallVectorImpl<SDValue> &Results,
830 SelectionDAG &DAG) const {
831 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000832 default:
833 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
834 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000835 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000836 return;
837 case ISD::LOAD: {
838 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
839 Results.push_back(SDValue(Node, 0));
840 Results.push_back(SDValue(Node, 1));
841 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
842 // function
843 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
844 return;
845 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000846 case ISD::STORE: {
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000847 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
848 Results.push_back(SDValue(Node, 0));
849 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000850 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000851 case ISD::UDIV: {
852 SDValue Op = SDValue(N, 0);
853 SDLoc DL(Op);
854 EVT VT = Op.getValueType();
855 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
856 N->getOperand(0), N->getOperand(1));
857 Results.push_back(UDIVREM);
858 break;
859 }
860 case ISD::UREM: {
861 SDValue Op = SDValue(N, 0);
862 SDLoc DL(Op);
863 EVT VT = Op.getValueType();
864 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
865 N->getOperand(0), N->getOperand(1));
866 Results.push_back(UDIVREM.getValue(1));
867 break;
868 }
869 case ISD::SDIV: {
870 SDValue Op = SDValue(N, 0);
871 SDLoc DL(Op);
872 EVT VT = Op.getValueType();
873 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
874 N->getOperand(0), N->getOperand(1));
875 Results.push_back(SDIVREM);
876 break;
877 }
878 case ISD::SREM: {
879 SDValue Op = SDValue(N, 0);
880 SDLoc DL(Op);
881 EVT VT = Op.getValueType();
882 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
883 N->getOperand(0), N->getOperand(1));
884 Results.push_back(SDIVREM.getValue(1));
885 break;
886 }
887 case ISD::SDIVREM: {
888 SDValue Op = SDValue(N, 1);
889 SDValue RES = LowerSDIVREM(Op, DAG);
890 Results.push_back(RES);
891 Results.push_back(RES.getValue(1));
892 break;
893 }
894 case ISD::UDIVREM: {
895 SDValue Op = SDValue(N, 0);
896 SDLoc DL(Op);
897 EVT VT = Op.getValueType();
898 EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
899
900 SDValue one = DAG.getConstant(1, HalfVT);
901 SDValue zero = DAG.getConstant(0, HalfVT);
902
903 //HiLo split
904 SDValue LHS = N->getOperand(0);
905 SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
906 SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
907
908 SDValue RHS = N->getOperand(1);
909 SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
910 SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
911
912 // Get Speculative values
913 SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
914 SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
915
916 SDValue REM_Hi = zero;
917 SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
918
919 SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
920 SDValue DIV_Lo = zero;
921
922 const unsigned halfBitWidth = HalfVT.getSizeInBits();
923
924 for (unsigned i = 0; i < halfBitWidth; ++i) {
925 SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
926 // Get Value of high bit
927 SDValue HBit;
928 if (halfBitWidth == 32 && Subtarget->hasBFE()) {
929 HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
930 } else {
931 HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
932 HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
933 }
934
935 SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
936 DAG.getConstant(halfBitWidth - 1, HalfVT));
937 REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
938 REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
939
940 REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
941 REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
942
943
944 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
945
946 SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
947 SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
948
949 DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
950
951 // Update REM
952
953 SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
954
955 REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
956 REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
957 REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
958 }
959
960 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
961 SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
962 Results.push_back(DIV);
963 Results.push_back(REM);
964 break;
965 }
966 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000967}
968
Tom Stellard880a80a2014-06-17 16:53:14 +0000969SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
970 SDValue Vector) const {
971
972 SDLoc DL(Vector);
973 EVT VecVT = Vector.getValueType();
974 EVT EltVT = VecVT.getVectorElementType();
975 SmallVector<SDValue, 8> Args;
976
977 for (unsigned i = 0, e = VecVT.getVectorNumElements();
978 i != e; ++i) {
979 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
980 Vector, DAG.getConstant(i, getVectorIdxTy())));
981 }
982
983 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
984}
985
986SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
987 SelectionDAG &DAG) const {
988
989 SDLoc DL(Op);
990 SDValue Vector = Op.getOperand(0);
991 SDValue Index = Op.getOperand(1);
992
993 if (isa<ConstantSDNode>(Index) ||
994 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
995 return Op;
996
997 Vector = vectorToVerticalVector(DAG, Vector);
998 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
999 Vector, Index);
1000}
1001
1002SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1003 SelectionDAG &DAG) const {
1004 SDLoc DL(Op);
1005 SDValue Vector = Op.getOperand(0);
1006 SDValue Value = Op.getOperand(1);
1007 SDValue Index = Op.getOperand(2);
1008
1009 if (isa<ConstantSDNode>(Index) ||
1010 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1011 return Op;
1012
1013 Vector = vectorToVerticalVector(DAG, Vector);
1014 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
1015 Vector, Value, Index);
1016 return vectorToVerticalVector(DAG, Insert);
1017}
1018
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001019SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1020 // On hw >= R700, COS/SIN input must be between -1. and 1.
1021 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
1022 EVT VT = Op.getValueType();
1023 SDValue Arg = Op.getOperand(0);
1024 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
1025 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
1026 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
1027 DAG.getConstantFP(0.15915494309, MVT::f32)),
1028 DAG.getConstantFP(0.5, MVT::f32)));
1029 unsigned TrigNode;
1030 switch (Op.getOpcode()) {
1031 case ISD::FCOS:
1032 TrigNode = AMDGPUISD::COS_HW;
1033 break;
1034 case ISD::FSIN:
1035 TrigNode = AMDGPUISD::SIN_HW;
1036 break;
1037 default:
1038 llvm_unreachable("Wrong trig opcode");
1039 }
1040 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
1041 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
1042 DAG.getConstantFP(-0.5, MVT::f32)));
1043 if (Gen >= AMDGPUSubtarget::R700)
1044 return TrigVal;
1045 // On R600 hw, COS/SIN input must be between -Pi and Pi.
1046 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1047 DAG.getConstantFP(3.14159265359, MVT::f32));
1048}
1049
Jan Vesely25f36272014-06-18 12:27:13 +00001050SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1051 SDLoc DL(Op);
1052 EVT VT = Op.getValueType();
1053
1054 SDValue Lo = Op.getOperand(0);
1055 SDValue Hi = Op.getOperand(1);
1056 SDValue Shift = Op.getOperand(2);
1057 SDValue Zero = DAG.getConstant(0, VT);
1058 SDValue One = DAG.getConstant(1, VT);
1059
1060 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1061 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1062 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1063 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1064
1065 // The dance around Width1 is necessary for 0 special case.
1066 // Without it the CompShift might be 32, producing incorrect results in
1067 // Overflow. So we do the shift in two steps, the alternative is to
1068 // add a conditional to filter the special case.
1069
1070 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1071 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1072
1073 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1074 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1075 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1076
1077 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1078 SDValue LoBig = Zero;
1079
1080 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1081 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1082
1083 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1084}
1085
Jan Vesely900ff2e2014-06-18 12:27:15 +00001086SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1087 SDLoc DL(Op);
1088 EVT VT = Op.getValueType();
1089
1090 SDValue Lo = Op.getOperand(0);
1091 SDValue Hi = Op.getOperand(1);
1092 SDValue Shift = Op.getOperand(2);
1093 SDValue Zero = DAG.getConstant(0, VT);
1094 SDValue One = DAG.getConstant(1, VT);
1095
Jan Veselyecf51332014-06-18 12:27:17 +00001096 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1097
Jan Vesely900ff2e2014-06-18 12:27:15 +00001098 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1099 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1100 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1101 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1102
1103 // The dance around Width1 is necessary for 0 special case.
1104 // Without it the CompShift might be 32, producing incorrect results in
1105 // Overflow. So we do the shift in two steps, the alternative is to
1106 // add a conditional to filter the special case.
1107
1108 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1109 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1110
Jan Veselyecf51332014-06-18 12:27:17 +00001111 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001112 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1113 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1114
Jan Veselyecf51332014-06-18 12:27:17 +00001115 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1116 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001117
1118 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1119 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1120
1121 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1122}
1123
Tom Stellard75aadc22012-12-11 21:25:42 +00001124SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1125 return DAG.getNode(
1126 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001127 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001128 MVT::i1,
1129 Op, DAG.getConstantFP(0.0f, MVT::f32),
1130 DAG.getCondCode(ISD::SETNE)
1131 );
1132}
1133
Tom Stellard75aadc22012-12-11 21:25:42 +00001134SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001135 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001136 unsigned DwordOffset) const {
1137 unsigned ByteOffset = DwordOffset * 4;
1138 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001139 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001140
1141 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1142 assert(isInt<16>(ByteOffset));
1143
1144 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1145 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1146 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1147 false, false, false, 0);
1148}
1149
Tom Stellard75aadc22012-12-11 21:25:42 +00001150bool R600TargetLowering::isZero(SDValue Op) const {
1151 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1152 return Cst->isNullValue();
1153 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1154 return CstFP->isZero();
1155 } else {
1156 return false;
1157 }
1158}
1159
1160SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001161 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001162 EVT VT = Op.getValueType();
1163
1164 SDValue LHS = Op.getOperand(0);
1165 SDValue RHS = Op.getOperand(1);
1166 SDValue True = Op.getOperand(2);
1167 SDValue False = Op.getOperand(3);
1168 SDValue CC = Op.getOperand(4);
1169 SDValue Temp;
1170
1171 // LHS and RHS are guaranteed to be the same value type
1172 EVT CompareVT = LHS.getValueType();
1173
1174 // Check if we can lower this to a native operation.
1175
Tom Stellard2add82d2013-03-08 15:37:09 +00001176 // Try to lower to a SET* instruction:
1177 //
1178 // SET* can match the following patterns:
1179 //
Tom Stellardcd428182013-09-28 02:50:38 +00001180 // select_cc f32, f32, -1, 0, cc_supported
1181 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1182 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001183 //
1184
1185 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001186 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1187 ISD::CondCode InverseCC =
1188 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001189 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1190 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1191 std::swap(False, True);
1192 CC = DAG.getCondCode(InverseCC);
1193 } else {
1194 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1195 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1196 std::swap(False, True);
1197 std::swap(LHS, RHS);
1198 CC = DAG.getCondCode(SwapInvCC);
1199 }
1200 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001201 }
1202
1203 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1204 (CompareVT == VT || VT == MVT::i32)) {
1205 // This can be matched by a SET* instruction.
1206 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1207 }
1208
Tom Stellard75aadc22012-12-11 21:25:42 +00001209 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001210 //
1211 // CND* can match the following patterns:
1212 //
Tom Stellardcd428182013-09-28 02:50:38 +00001213 // select_cc f32, 0.0, f32, f32, cc_supported
1214 // select_cc f32, 0.0, i32, i32, cc_supported
1215 // select_cc i32, 0, f32, f32, cc_supported
1216 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001217 //
Tom Stellardcd428182013-09-28 02:50:38 +00001218
1219 // Try to move the zero value to the RHS
1220 if (isZero(LHS)) {
1221 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1222 // Try swapping the operands
1223 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1224 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1225 std::swap(LHS, RHS);
1226 CC = DAG.getCondCode(CCSwapped);
1227 } else {
1228 // Try inverting the conditon and then swapping the operands
1229 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1230 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1231 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1232 std::swap(True, False);
1233 std::swap(LHS, RHS);
1234 CC = DAG.getCondCode(CCSwapped);
1235 }
1236 }
1237 }
1238 if (isZero(RHS)) {
1239 SDValue Cond = LHS;
1240 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001241 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1242 if (CompareVT != VT) {
1243 // Bitcast True / False to the correct types. This will end up being
1244 // a nop, but it allows us to define only a single pattern in the
1245 // .TD files for each CND* instruction rather than having to have
1246 // one pattern for integer True/False and one for fp True/False
1247 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1248 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1249 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001250
1251 switch (CCOpcode) {
1252 case ISD::SETONE:
1253 case ISD::SETUNE:
1254 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001255 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1256 Temp = True;
1257 True = False;
1258 False = Temp;
1259 break;
1260 default:
1261 break;
1262 }
1263 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1264 Cond, Zero,
1265 True, False,
1266 DAG.getCondCode(CCOpcode));
1267 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1268 }
1269
Tom Stellard75aadc22012-12-11 21:25:42 +00001270 // If we make it this for it means we have no native instructions to handle
1271 // this SELECT_CC, so we must lower it.
1272 SDValue HWTrue, HWFalse;
1273
1274 if (CompareVT == MVT::f32) {
1275 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1276 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1277 } else if (CompareVT == MVT::i32) {
1278 HWTrue = DAG.getConstant(-1, CompareVT);
1279 HWFalse = DAG.getConstant(0, CompareVT);
1280 }
1281 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001282 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001283 }
1284
1285 // Lower this unsupported SELECT_CC into a combination of two supported
1286 // SELECT_CC operations.
1287 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1288
1289 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1290 Cond, HWFalse,
1291 True, False,
1292 DAG.getCondCode(ISD::SETNE));
1293}
1294
Alp Tokercb402912014-01-24 17:20:08 +00001295/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001296/// convert these pointers to a register index. Each register holds
1297/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1298/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1299/// for indirect addressing.
1300SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1301 unsigned StackWidth,
1302 SelectionDAG &DAG) const {
1303 unsigned SRLPad;
1304 switch(StackWidth) {
1305 case 1:
1306 SRLPad = 2;
1307 break;
1308 case 2:
1309 SRLPad = 3;
1310 break;
1311 case 4:
1312 SRLPad = 4;
1313 break;
1314 default: llvm_unreachable("Invalid stack width");
1315 }
1316
Andrew Trickef9de2a2013-05-25 02:42:55 +00001317 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001318 DAG.getConstant(SRLPad, MVT::i32));
1319}
1320
1321void R600TargetLowering::getStackAddress(unsigned StackWidth,
1322 unsigned ElemIdx,
1323 unsigned &Channel,
1324 unsigned &PtrIncr) const {
1325 switch (StackWidth) {
1326 default:
1327 case 1:
1328 Channel = 0;
1329 if (ElemIdx > 0) {
1330 PtrIncr = 1;
1331 } else {
1332 PtrIncr = 0;
1333 }
1334 break;
1335 case 2:
1336 Channel = ElemIdx % 2;
1337 if (ElemIdx == 2) {
1338 PtrIncr = 1;
1339 } else {
1340 PtrIncr = 0;
1341 }
1342 break;
1343 case 4:
1344 Channel = ElemIdx;
1345 PtrIncr = 0;
1346 break;
1347 }
1348}
1349
Tom Stellard75aadc22012-12-11 21:25:42 +00001350SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001351 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001352 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1353 SDValue Chain = Op.getOperand(0);
1354 SDValue Value = Op.getOperand(1);
1355 SDValue Ptr = Op.getOperand(2);
1356
Tom Stellard2ffc3302013-08-26 15:05:44 +00001357 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001358 if (Result.getNode()) {
1359 return Result;
1360 }
1361
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001362 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1363 if (StoreNode->isTruncatingStore()) {
1364 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001365 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001366 EVT MemVT = StoreNode->getMemoryVT();
1367 SDValue MaskConstant;
1368 if (MemVT == MVT::i8) {
1369 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1370 } else {
1371 assert(MemVT == MVT::i16);
1372 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1373 }
1374 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1375 DAG.getConstant(2, MVT::i32));
1376 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1377 DAG.getConstant(0x00000003, VT));
1378 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1379 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1380 DAG.getConstant(3, VT));
1381 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1382 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1383 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1384 // vector instead.
1385 SDValue Src[4] = {
1386 ShiftedValue,
1387 DAG.getConstant(0, MVT::i32),
1388 DAG.getConstant(0, MVT::i32),
1389 Mask
1390 };
Craig Topper48d114b2014-04-26 18:35:24 +00001391 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001392 SDValue Args[3] = { Chain, Input, DWordAddr };
1393 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001394 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001395 StoreNode->getMemOperand());
1396 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1397 Value.getValueType().bitsGE(MVT::i32)) {
1398 // Convert pointer from byte address to dword address.
1399 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1400 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1401 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001402
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001403 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001404 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001405 } else {
1406 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1407 }
1408 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001409 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001410 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001411
1412 EVT ValueVT = Value.getValueType();
1413
1414 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1415 return SDValue();
1416 }
1417
Tom Stellarde9373602014-01-22 19:24:14 +00001418 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1419 if (Ret.getNode()) {
1420 return Ret;
1421 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001422 // Lowering for indirect addressing
1423
1424 const MachineFunction &MF = DAG.getMachineFunction();
1425 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1426 getTargetMachine().getFrameLowering());
1427 unsigned StackWidth = TFL->getStackWidth(MF);
1428
1429 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1430
1431 if (ValueVT.isVector()) {
1432 unsigned NumElemVT = ValueVT.getVectorNumElements();
1433 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001434 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001435
1436 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1437 "vector width in load");
1438
1439 for (unsigned i = 0; i < NumElemVT; ++i) {
1440 unsigned Channel, PtrIncr;
1441 getStackAddress(StackWidth, i, Channel, PtrIncr);
1442 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1443 DAG.getConstant(PtrIncr, MVT::i32));
1444 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1445 Value, DAG.getConstant(i, MVT::i32));
1446
1447 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1448 Chain, Elem, Ptr,
1449 DAG.getTargetConstant(Channel, MVT::i32));
1450 }
Craig Topper48d114b2014-04-26 18:35:24 +00001451 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001452 } else {
1453 if (ValueVT == MVT::i8) {
1454 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1455 }
1456 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001457 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001458 }
1459
1460 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001461}
1462
Tom Stellard365366f2013-01-23 02:09:06 +00001463// return (512 + (kc_bank << 12)
1464static int
1465ConstantAddressBlock(unsigned AddressSpace) {
1466 switch (AddressSpace) {
1467 case AMDGPUAS::CONSTANT_BUFFER_0:
1468 return 512;
1469 case AMDGPUAS::CONSTANT_BUFFER_1:
1470 return 512 + 4096;
1471 case AMDGPUAS::CONSTANT_BUFFER_2:
1472 return 512 + 4096 * 2;
1473 case AMDGPUAS::CONSTANT_BUFFER_3:
1474 return 512 + 4096 * 3;
1475 case AMDGPUAS::CONSTANT_BUFFER_4:
1476 return 512 + 4096 * 4;
1477 case AMDGPUAS::CONSTANT_BUFFER_5:
1478 return 512 + 4096 * 5;
1479 case AMDGPUAS::CONSTANT_BUFFER_6:
1480 return 512 + 4096 * 6;
1481 case AMDGPUAS::CONSTANT_BUFFER_7:
1482 return 512 + 4096 * 7;
1483 case AMDGPUAS::CONSTANT_BUFFER_8:
1484 return 512 + 4096 * 8;
1485 case AMDGPUAS::CONSTANT_BUFFER_9:
1486 return 512 + 4096 * 9;
1487 case AMDGPUAS::CONSTANT_BUFFER_10:
1488 return 512 + 4096 * 10;
1489 case AMDGPUAS::CONSTANT_BUFFER_11:
1490 return 512 + 4096 * 11;
1491 case AMDGPUAS::CONSTANT_BUFFER_12:
1492 return 512 + 4096 * 12;
1493 case AMDGPUAS::CONSTANT_BUFFER_13:
1494 return 512 + 4096 * 13;
1495 case AMDGPUAS::CONSTANT_BUFFER_14:
1496 return 512 + 4096 * 14;
1497 case AMDGPUAS::CONSTANT_BUFFER_15:
1498 return 512 + 4096 * 15;
1499 default:
1500 return -1;
1501 }
1502}
1503
1504SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1505{
1506 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001507 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001508 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1509 SDValue Chain = Op.getOperand(0);
1510 SDValue Ptr = Op.getOperand(1);
1511 SDValue LoweredLoad;
1512
Tom Stellarde9373602014-01-22 19:24:14 +00001513 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1514 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001515 SDValue Ops[2] = {
1516 Ret,
1517 Chain
1518 };
Craig Topper64941d92014-04-27 19:20:57 +00001519 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001520 }
1521
1522
Tom Stellard35bb18c2013-08-26 15:06:04 +00001523 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1524 SDValue MergedValues[2] = {
1525 SplitVectorLoad(Op, DAG),
1526 Chain
1527 };
Craig Topper64941d92014-04-27 19:20:57 +00001528 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001529 }
1530
Tom Stellard365366f2013-01-23 02:09:06 +00001531 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001532 if (ConstantBlock > -1 &&
1533 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1534 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001535 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001536 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1537 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001538 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001539 SDValue Slots[4];
1540 for (unsigned i = 0; i < 4; i++) {
1541 // We want Const position encoded with the following formula :
1542 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1543 // const_index is Ptr computed by llvm using an alignment of 16.
1544 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1545 // then div by 4 at the ISel step
1546 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1547 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1548 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1549 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001550 EVT NewVT = MVT::v4i32;
1551 unsigned NumElements = 4;
1552 if (VT.isVector()) {
1553 NewVT = VT;
1554 NumElements = VT.getVectorNumElements();
1555 }
Craig Topper48d114b2014-04-26 18:35:24 +00001556 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001557 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001558 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001559 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001560 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001561 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001562 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001563 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001564 );
1565 }
1566
1567 if (!VT.isVector()) {
1568 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1569 DAG.getConstant(0, MVT::i32));
1570 }
1571
1572 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001573 Result,
1574 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001575 };
Craig Topper64941d92014-04-27 19:20:57 +00001576 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001577 }
1578
Matt Arsenault909d0c02013-10-30 23:43:29 +00001579 // For most operations returning SDValue() will result in the node being
1580 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1581 // need to manually expand loads that may be legal in some address spaces and
1582 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1583 // compute shaders, since the data is sign extended when it is uploaded to the
1584 // buffer. However SEXT loads from other address spaces are not supported, so
1585 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001586 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1587 EVT MemVT = LoadNode->getMemoryVT();
1588 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1589 SDValue ShiftAmount =
1590 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1591 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1592 LoadNode->getPointerInfo(), MemVT,
1593 LoadNode->isVolatile(),
1594 LoadNode->isNonTemporal(),
1595 LoadNode->getAlignment());
1596 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1597 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1598
1599 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001600 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001601 }
1602
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001603 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1604 return SDValue();
1605 }
1606
1607 // Lowering for indirect addressing
1608 const MachineFunction &MF = DAG.getMachineFunction();
1609 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1610 getTargetMachine().getFrameLowering());
1611 unsigned StackWidth = TFL->getStackWidth(MF);
1612
1613 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1614
1615 if (VT.isVector()) {
1616 unsigned NumElemVT = VT.getVectorNumElements();
1617 EVT ElemVT = VT.getVectorElementType();
1618 SDValue Loads[4];
1619
1620 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1621 "vector width in load");
1622
1623 for (unsigned i = 0; i < NumElemVT; ++i) {
1624 unsigned Channel, PtrIncr;
1625 getStackAddress(StackWidth, i, Channel, PtrIncr);
1626 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1627 DAG.getConstant(PtrIncr, MVT::i32));
1628 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1629 Chain, Ptr,
1630 DAG.getTargetConstant(Channel, MVT::i32),
1631 Op.getOperand(2));
1632 }
1633 for (unsigned i = NumElemVT; i < 4; ++i) {
1634 Loads[i] = DAG.getUNDEF(ElemVT);
1635 }
1636 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001637 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001638 } else {
1639 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1640 Chain, Ptr,
1641 DAG.getTargetConstant(0, MVT::i32), // Channel
1642 Op.getOperand(2));
1643 }
1644
Matt Arsenault7939acd2014-04-07 16:44:24 +00001645 SDValue Ops[2] = {
1646 LoweredLoad,
1647 Chain
1648 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001649
Craig Topper64941d92014-04-27 19:20:57 +00001650 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001651}
Tom Stellard75aadc22012-12-11 21:25:42 +00001652
Matt Arsenault1d555c42014-06-23 18:00:55 +00001653SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1654 SDValue Chain = Op.getOperand(0);
1655 SDValue Cond = Op.getOperand(1);
1656 SDValue Jump = Op.getOperand(2);
1657
1658 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1659 Chain, Jump, Cond);
1660}
1661
Tom Stellard75aadc22012-12-11 21:25:42 +00001662/// XXX Only kernel functions are supported, so we can assume for now that
1663/// every function is a kernel function, but in the future we should use
1664/// separate calling conventions for kernel and non-kernel functions.
1665SDValue R600TargetLowering::LowerFormalArguments(
1666 SDValue Chain,
1667 CallingConv::ID CallConv,
1668 bool isVarArg,
1669 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001670 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001671 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001672 SmallVector<CCValAssign, 16> ArgLocs;
1673 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1674 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001675 MachineFunction &MF = DAG.getMachineFunction();
1676 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001677
Tom Stellardaf775432013-10-23 00:44:32 +00001678 SmallVector<ISD::InputArg, 8> LocalIns;
1679
Matt Arsenault209a7b92014-04-18 07:40:20 +00001680 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001681
1682 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001683
Tom Stellard1e803092013-07-23 01:48:18 +00001684 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001685 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001686 EVT VT = Ins[i].VT;
1687 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001688
Vincent Lejeunef143af32013-11-11 22:10:24 +00001689 if (ShaderType != ShaderType::COMPUTE) {
1690 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1691 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1692 InVals.push_back(Register);
1693 continue;
1694 }
1695
Tom Stellard75aadc22012-12-11 21:25:42 +00001696 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001697 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001698
Matt Arsenaultfae02982014-03-17 18:58:11 +00001699 // i64 isn't a legal type, so the register type used ends up as i32, which
1700 // isn't expected here. It attempts to create this sextload, but it ends up
1701 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1702 // for <1 x i64>.
1703
Tom Stellardacfeebf2013-07-23 01:48:05 +00001704 // The first 36 bytes of the input buffer contains information about
1705 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001706
1707 // FIXME: This should really check the extload type, but the handling of
1708 // extload vecto parameters seems to be broken.
1709 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1710 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1711 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001712 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1713 MachinePointerInfo(UndefValue::get(PtrTy)),
1714 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001715
1716 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001717 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001718 }
1719 return Chain;
1720}
1721
Matt Arsenault758659232013-05-18 00:21:46 +00001722EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001723 if (!VT.isVector())
1724 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001725 return VT.changeVectorElementTypeToInteger();
1726}
1727
Matt Arsenault209a7b92014-04-18 07:40:20 +00001728static SDValue CompactSwizzlableVector(
1729 SelectionDAG &DAG, SDValue VectorEntry,
1730 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001731 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1732 assert(RemapSwizzle.empty());
1733 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001734 VectorEntry.getOperand(0),
1735 VectorEntry.getOperand(1),
1736 VectorEntry.getOperand(2),
1737 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001738 };
1739
1740 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001741 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1742 // We mask write here to teach later passes that the ith element of this
1743 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1744 // break false dependencies and additionnaly make assembly easier to read.
1745 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001746 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1747 if (C->isZero()) {
1748 RemapSwizzle[i] = 4; // SEL_0
1749 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1750 } else if (C->isExactlyValue(1.0)) {
1751 RemapSwizzle[i] = 5; // SEL_1
1752 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1753 }
1754 }
1755
1756 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1757 continue;
1758 for (unsigned j = 0; j < i; j++) {
1759 if (NewBldVec[i] == NewBldVec[j]) {
1760 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1761 RemapSwizzle[i] = j;
1762 break;
1763 }
1764 }
1765 }
1766
1767 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001768 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001769}
1770
Benjamin Kramer193960c2013-06-11 13:32:25 +00001771static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1772 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001773 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1774 assert(RemapSwizzle.empty());
1775 SDValue NewBldVec[4] = {
1776 VectorEntry.getOperand(0),
1777 VectorEntry.getOperand(1),
1778 VectorEntry.getOperand(2),
1779 VectorEntry.getOperand(3)
1780 };
1781 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001782 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001783 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001784 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1785 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1786 ->getZExtValue();
1787 if (i == Idx)
1788 isUnmovable[Idx] = true;
1789 }
1790 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001791
1792 for (unsigned i = 0; i < 4; i++) {
1793 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1794 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1795 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001796 if (isUnmovable[Idx])
1797 continue;
1798 // Swap i and Idx
1799 std::swap(NewBldVec[Idx], NewBldVec[i]);
1800 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1801 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001802 }
1803 }
1804
1805 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001806 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001807}
1808
1809
1810SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1811SDValue Swz[4], SelectionDAG &DAG) const {
1812 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1813 // Old -> New swizzle values
1814 DenseMap<unsigned, unsigned> SwizzleRemap;
1815
1816 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1817 for (unsigned i = 0; i < 4; i++) {
1818 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1819 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1820 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1821 }
1822
1823 SwizzleRemap.clear();
1824 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1825 for (unsigned i = 0; i < 4; i++) {
1826 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1827 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1828 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1829 }
1830
1831 return BuildVector;
1832}
1833
1834
Tom Stellard75aadc22012-12-11 21:25:42 +00001835//===----------------------------------------------------------------------===//
1836// Custom DAG Optimizations
1837//===----------------------------------------------------------------------===//
1838
1839SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1840 DAGCombinerInfo &DCI) const {
1841 SelectionDAG &DAG = DCI.DAG;
1842
1843 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001844 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001845 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1846 case ISD::FP_ROUND: {
1847 SDValue Arg = N->getOperand(0);
1848 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001849 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001850 Arg.getOperand(0));
1851 }
1852 break;
1853 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001854
1855 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1856 // (i32 select_cc f32, f32, -1, 0 cc)
1857 //
1858 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1859 // this to one of the SET*_DX10 instructions.
1860 case ISD::FP_TO_SINT: {
1861 SDValue FNeg = N->getOperand(0);
1862 if (FNeg.getOpcode() != ISD::FNEG) {
1863 return SDValue();
1864 }
1865 SDValue SelectCC = FNeg.getOperand(0);
1866 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1867 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1868 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1869 !isHWTrueValue(SelectCC.getOperand(2)) ||
1870 !isHWFalseValue(SelectCC.getOperand(3))) {
1871 return SDValue();
1872 }
1873
Andrew Trickef9de2a2013-05-25 02:42:55 +00001874 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001875 SelectCC.getOperand(0), // LHS
1876 SelectCC.getOperand(1), // RHS
1877 DAG.getConstant(-1, MVT::i32), // True
1878 DAG.getConstant(0, MVT::i32), // Flase
1879 SelectCC.getOperand(4)); // CC
1880
1881 break;
1882 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001883
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001884 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1885 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001886 case ISD::INSERT_VECTOR_ELT: {
1887 SDValue InVec = N->getOperand(0);
1888 SDValue InVal = N->getOperand(1);
1889 SDValue EltNo = N->getOperand(2);
1890 SDLoc dl(N);
1891
1892 // If the inserted element is an UNDEF, just use the input vector.
1893 if (InVal.getOpcode() == ISD::UNDEF)
1894 return InVec;
1895
1896 EVT VT = InVec.getValueType();
1897
1898 // If we can't generate a legal BUILD_VECTOR, exit
1899 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1900 return SDValue();
1901
1902 // Check that we know which element is being inserted
1903 if (!isa<ConstantSDNode>(EltNo))
1904 return SDValue();
1905 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1906
1907 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1908 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1909 // vector elements.
1910 SmallVector<SDValue, 8> Ops;
1911 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1912 Ops.append(InVec.getNode()->op_begin(),
1913 InVec.getNode()->op_end());
1914 } else if (InVec.getOpcode() == ISD::UNDEF) {
1915 unsigned NElts = VT.getVectorNumElements();
1916 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1917 } else {
1918 return SDValue();
1919 }
1920
1921 // Insert the element
1922 if (Elt < Ops.size()) {
1923 // All the operands of BUILD_VECTOR must have the same type;
1924 // we enforce that here.
1925 EVT OpVT = Ops[0].getValueType();
1926 if (InVal.getValueType() != OpVT)
1927 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1928 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1929 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1930 Ops[Elt] = InVal;
1931 }
1932
1933 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001934 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001935 }
1936
Tom Stellard365366f2013-01-23 02:09:06 +00001937 // Extract_vec (Build_vector) generated by custom lowering
1938 // also needs to be customly combined
1939 case ISD::EXTRACT_VECTOR_ELT: {
1940 SDValue Arg = N->getOperand(0);
1941 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1942 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1943 unsigned Element = Const->getZExtValue();
1944 return Arg->getOperand(Element);
1945 }
1946 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001947 if (Arg.getOpcode() == ISD::BITCAST &&
1948 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1949 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1950 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001951 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001952 Arg->getOperand(0).getOperand(Element));
1953 }
1954 }
Tom Stellard365366f2013-01-23 02:09:06 +00001955 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001956
1957 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001958 // Try common optimizations
1959 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1960 if (Ret.getNode())
1961 return Ret;
1962
Tom Stellarde06163a2013-02-07 14:02:35 +00001963 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1964 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001965 //
1966 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1967 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001968 SDValue LHS = N->getOperand(0);
1969 if (LHS.getOpcode() != ISD::SELECT_CC) {
1970 return SDValue();
1971 }
1972
1973 SDValue RHS = N->getOperand(1);
1974 SDValue True = N->getOperand(2);
1975 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001976 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001977
1978 if (LHS.getOperand(2).getNode() != True.getNode() ||
1979 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001980 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001981 return SDValue();
1982 }
1983
Tom Stellard5e524892013-03-08 15:37:11 +00001984 switch (NCC) {
1985 default: return SDValue();
1986 case ISD::SETNE: return LHS;
1987 case ISD::SETEQ: {
1988 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1989 LHSCC = ISD::getSetCCInverse(LHSCC,
1990 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001991 if (DCI.isBeforeLegalizeOps() ||
1992 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1993 return DAG.getSelectCC(SDLoc(N),
1994 LHS.getOperand(0),
1995 LHS.getOperand(1),
1996 LHS.getOperand(2),
1997 LHS.getOperand(3),
1998 LHSCC);
1999 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002000 }
Tom Stellard5e524892013-03-08 15:37:11 +00002001 }
Tom Stellardcd428182013-09-28 02:50:38 +00002002 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002003 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002004
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002005 case AMDGPUISD::EXPORT: {
2006 SDValue Arg = N->getOperand(1);
2007 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2008 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002009
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002010 SDValue NewArgs[8] = {
2011 N->getOperand(0), // Chain
2012 SDValue(),
2013 N->getOperand(2), // ArrayBase
2014 N->getOperand(3), // Type
2015 N->getOperand(4), // SWZ_X
2016 N->getOperand(5), // SWZ_Y
2017 N->getOperand(6), // SWZ_Z
2018 N->getOperand(7) // SWZ_W
2019 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002020 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002021 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002022 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002023 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002024 case AMDGPUISD::TEXTURE_FETCH: {
2025 SDValue Arg = N->getOperand(1);
2026 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2027 break;
2028
2029 SDValue NewArgs[19] = {
2030 N->getOperand(0),
2031 N->getOperand(1),
2032 N->getOperand(2),
2033 N->getOperand(3),
2034 N->getOperand(4),
2035 N->getOperand(5),
2036 N->getOperand(6),
2037 N->getOperand(7),
2038 N->getOperand(8),
2039 N->getOperand(9),
2040 N->getOperand(10),
2041 N->getOperand(11),
2042 N->getOperand(12),
2043 N->getOperand(13),
2044 N->getOperand(14),
2045 N->getOperand(15),
2046 N->getOperand(16),
2047 N->getOperand(17),
2048 N->getOperand(18),
2049 };
2050 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2051 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002052 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002053 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002054 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002055
2056 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002057}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002058
2059static bool
2060FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002061 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002062 const R600InstrInfo *TII =
2063 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2064 if (!Src.isMachineOpcode())
2065 return false;
2066 switch (Src.getMachineOpcode()) {
2067 case AMDGPU::FNEG_R600:
2068 if (!Neg.getNode())
2069 return false;
2070 Src = Src.getOperand(0);
2071 Neg = DAG.getTargetConstant(1, MVT::i32);
2072 return true;
2073 case AMDGPU::FABS_R600:
2074 if (!Abs.getNode())
2075 return false;
2076 Src = Src.getOperand(0);
2077 Abs = DAG.getTargetConstant(1, MVT::i32);
2078 return true;
2079 case AMDGPU::CONST_COPY: {
2080 unsigned Opcode = ParentNode->getMachineOpcode();
2081 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2082
2083 if (!Sel.getNode())
2084 return false;
2085
2086 SDValue CstOffset = Src.getOperand(0);
2087 if (ParentNode->getValueType(0).isVector())
2088 return false;
2089
2090 // Gather constants values
2091 int SrcIndices[] = {
2092 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2093 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2094 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2095 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2096 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2097 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2098 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2099 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2100 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2101 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2102 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2103 };
2104 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002105 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002106 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2107 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2108 continue;
2109 if (HasDst) {
2110 OtherSrcIdx--;
2111 OtherSelIdx--;
2112 }
2113 if (RegisterSDNode *Reg =
2114 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2115 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002116 ConstantSDNode *Cst
2117 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002118 Consts.push_back(Cst->getZExtValue());
2119 }
2120 }
2121 }
2122
Matt Arsenault37c12d72014-05-12 20:42:57 +00002123 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002124 Consts.push_back(Cst->getZExtValue());
2125 if (!TII->fitsConstReadLimitations(Consts)) {
2126 return false;
2127 }
2128
2129 Sel = CstOffset;
2130 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2131 return true;
2132 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002133 case AMDGPU::MOV_IMM_I32:
2134 case AMDGPU::MOV_IMM_F32: {
2135 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2136 uint64_t ImmValue = 0;
2137
2138
2139 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2140 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2141 float FloatValue = FPC->getValueAPF().convertToFloat();
2142 if (FloatValue == 0.0) {
2143 ImmReg = AMDGPU::ZERO;
2144 } else if (FloatValue == 0.5) {
2145 ImmReg = AMDGPU::HALF;
2146 } else if (FloatValue == 1.0) {
2147 ImmReg = AMDGPU::ONE;
2148 } else {
2149 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2150 }
2151 } else {
2152 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2153 uint64_t Value = C->getZExtValue();
2154 if (Value == 0) {
2155 ImmReg = AMDGPU::ZERO;
2156 } else if (Value == 1) {
2157 ImmReg = AMDGPU::ONE_INT;
2158 } else {
2159 ImmValue = Value;
2160 }
2161 }
2162
2163 // Check that we aren't already using an immediate.
2164 // XXX: It's possible for an instruction to have more than one
2165 // immediate operand, but this is not supported yet.
2166 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2167 if (!Imm.getNode())
2168 return false;
2169 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2170 assert(C);
2171 if (C->getZExtValue())
2172 return false;
2173 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2174 }
2175 Src = DAG.getRegister(ImmReg, MVT::i32);
2176 return true;
2177 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002178 default:
2179 return false;
2180 }
2181}
2182
2183
2184/// \brief Fold the instructions after selecting them
2185SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2186 SelectionDAG &DAG) const {
2187 const R600InstrInfo *TII =
2188 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2189 if (!Node->isMachineOpcode())
2190 return Node;
2191 unsigned Opcode = Node->getMachineOpcode();
2192 SDValue FakeOp;
2193
2194 std::vector<SDValue> Ops;
2195 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
2196 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002197 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002198
2199 if (Opcode == AMDGPU::DOT_4) {
2200 int OperandIdx[] = {
2201 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2202 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002209 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002210 int NegIdx[] = {
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2219 };
2220 int AbsIdx[] = {
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2223 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2224 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2225 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2228 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2229 };
2230 for (unsigned i = 0; i < 8; i++) {
2231 if (OperandIdx[i] < 0)
2232 return Node;
2233 SDValue &Src = Ops[OperandIdx[i] - 1];
2234 SDValue &Neg = Ops[NegIdx[i] - 1];
2235 SDValue &Abs = Ops[AbsIdx[i] - 1];
2236 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2237 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2238 if (HasDst)
2239 SelIdx--;
2240 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002241 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2242 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2243 }
2244 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2245 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2246 SDValue &Src = Ops[i];
2247 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002248 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2249 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002250 } else if (Opcode == AMDGPU::CLAMP_R600) {
2251 SDValue Src = Node->getOperand(0);
2252 if (!Src.isMachineOpcode() ||
2253 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2254 return Node;
2255 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2256 AMDGPU::OpName::clamp);
2257 if (ClampIdx < 0)
2258 return Node;
2259 std::vector<SDValue> Ops;
2260 unsigned NumOp = Src.getNumOperands();
2261 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002262 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002263 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2264 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2265 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002266 } else {
2267 if (!TII->hasInstrModifiers(Opcode))
2268 return Node;
2269 int OperandIdx[] = {
2270 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2271 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2272 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2273 };
2274 int NegIdx[] = {
2275 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2276 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2277 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2278 };
2279 int AbsIdx[] = {
2280 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2281 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2282 -1
2283 };
2284 for (unsigned i = 0; i < 3; i++) {
2285 if (OperandIdx[i] < 0)
2286 return Node;
2287 SDValue &Src = Ops[OperandIdx[i] - 1];
2288 SDValue &Neg = Ops[NegIdx[i] - 1];
2289 SDValue FakeAbs;
2290 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2291 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2292 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002293 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2294 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002295 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002296 ImmIdx--;
2297 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002298 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002299 SDValue &Imm = Ops[ImmIdx];
2300 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002301 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2302 }
2303 }
2304
2305 return Node;
2306}