blob: 7f3560a4eba8a09a8557a0473e9e98f2c86a44f7 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000027#include "llvm/IR/Argument.h"
28#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
32R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000033 AMDGPUTargetLowering(TM),
34 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000035 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
36 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
37 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
38 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000039 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
40 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
41
Tom Stellard75aadc22012-12-11 21:25:42 +000042 computeRegisterProperties();
43
Tom Stellard0351ea22013-09-28 02:50:50 +000044 // Set condition code actions
45 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000047 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000049 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000051 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
52 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000055 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
56 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
57
58 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
59 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
62
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000063 setOperationAction(ISD::FCOS, MVT::f32, Custom);
64 setOperationAction(ISD::FSIN, MVT::f32, Custom);
65
Tom Stellard75aadc22012-12-11 21:25:42 +000066 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000067 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
Tom Stellard492ebea2013-03-08 15:37:07 +000069 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
70 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000071 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000072
73 setOperationAction(ISD::FSUB, MVT::f32, Expand);
74
75 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
76 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000078
Tom Stellard75aadc22012-12-11 21:25:42 +000079 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
80 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
81
Tom Stellarde8f9f282013-03-08 15:37:05 +000082 setOperationAction(ISD::SETCC, MVT::i32, Expand);
83 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000084 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
85
Tom Stellard53f2f902013-09-05 18:38:03 +000086 setOperationAction(ISD::SELECT, MVT::i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::f32, Expand);
88 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000090
Matt Arsenault4e466652014-04-16 01:41:30 +000091 // Expand sign extension of vectors
92 if (!Subtarget->hasBFE())
93 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
94
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
97
98 if (!Subtarget->hasBFE())
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
101 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
102
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
107
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
111
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
113
114
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000115 // Legalize loads and stores to the private address space.
116 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000117 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000118 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000119
120 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
121 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000122 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
123 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
125 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000126 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
127 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
128
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000130 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000131 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000132 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000133 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
134 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000135
Tom Stellard365366f2013-01-23 02:09:06 +0000136 setOperationAction(ISD::LOAD, MVT::i32, Custom);
137 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000138 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
139
Tom Stellard880a80a2014-06-17 16:53:14 +0000140 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
141 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
143 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
144
145 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
146 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
147 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
148 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
149
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000151 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000152 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000153 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000154 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000155
Matt Arsenaultb8b51532014-06-23 18:00:38 +0000156 setOperationAction(ISD::SUB, MVT::i64, Expand);
157
Tom Stellard5f337882014-04-29 23:12:43 +0000158 // These should be replaced by UDVIREM, but it does not happen automatically
159 // during Type Legalization
160 setOperationAction(ISD::UDIV, MVT::i64, Custom);
161 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000162 setOperationAction(ISD::SDIV, MVT::i64, Custom);
163 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000164
Jan Vesely25f36272014-06-18 12:27:13 +0000165 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
166 // to be Legal/Custom in order to avoid library calls.
167 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000168 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000169 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000170
Michel Danzer49812b52013-07-10 16:37:07 +0000171 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
172
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000173 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
174 for (MVT VT : ScalarIntVTs) {
175 setOperationAction(ISD::ADDC, VT, Expand);
176 setOperationAction(ISD::SUBC, VT, Expand);
177 setOperationAction(ISD::ADDE, VT, Expand);
178 setOperationAction(ISD::SUBE, VT, Expand);
179 }
180
Tom Stellardb852af52013-03-08 15:37:03 +0000181 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000182 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000183 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000184}
185
186MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
187 MachineInstr * MI, MachineBasicBlock * BB) const {
188 MachineFunction * MF = BB->getParent();
189 MachineRegisterInfo &MRI = MF->getRegInfo();
190 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000191 const R600InstrInfo *TII =
192 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000193
194 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000195 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000196 // Replace LDS_*_RET instruction that don't have any uses with the
197 // equivalent LDS_*_NORET instruction.
198 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000199 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
200 assert(DstIdx != -1);
201 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000202 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
203 return BB;
204
205 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
206 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000207 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
208 NewMI.addOperand(MI->getOperand(i));
209 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000210 } else {
211 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
212 }
213 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000214 case AMDGPU::CLAMP_R600: {
215 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
216 AMDGPU::MOV,
217 MI->getOperand(0).getReg(),
218 MI->getOperand(1).getReg());
219 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
220 break;
221 }
222
223 case AMDGPU::FABS_R600: {
224 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
225 AMDGPU::MOV,
226 MI->getOperand(0).getReg(),
227 MI->getOperand(1).getReg());
228 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
229 break;
230 }
231
232 case AMDGPU::FNEG_R600: {
233 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
234 AMDGPU::MOV,
235 MI->getOperand(0).getReg(),
236 MI->getOperand(1).getReg());
237 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
238 break;
239 }
240
Tom Stellard75aadc22012-12-11 21:25:42 +0000241 case AMDGPU::MASK_WRITE: {
242 unsigned maskedRegister = MI->getOperand(0).getReg();
243 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
244 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
245 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
246 break;
247 }
248
249 case AMDGPU::MOV_IMM_F32:
250 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
251 MI->getOperand(1).getFPImm()->getValueAPF()
252 .bitcastToAPInt().getZExtValue());
253 break;
254 case AMDGPU::MOV_IMM_I32:
255 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
256 MI->getOperand(1).getImm());
257 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000258 case AMDGPU::CONST_COPY: {
259 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
260 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000261 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000262 MI->getOperand(1).getImm());
263 break;
264 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000265
266 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000267 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000268 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000269 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000270
271 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
272 .addOperand(MI->getOperand(0))
273 .addOperand(MI->getOperand(1))
274 .addImm(EOP); // Set End of program bit
275 break;
276 }
277
Tom Stellard75aadc22012-12-11 21:25:42 +0000278 case AMDGPU::TXD: {
279 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
280 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000281 MachineOperand &RID = MI->getOperand(4);
282 MachineOperand &SID = MI->getOperand(5);
283 unsigned TextureId = MI->getOperand(6).getImm();
284 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
285 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000286
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000287 switch (TextureId) {
288 case 5: // Rect
289 CTX = CTY = 0;
290 break;
291 case 6: // Shadow1D
292 SrcW = SrcZ;
293 break;
294 case 7: // Shadow2D
295 SrcW = SrcZ;
296 break;
297 case 8: // ShadowRect
298 CTX = CTY = 0;
299 SrcW = SrcZ;
300 break;
301 case 9: // 1DArray
302 SrcZ = SrcY;
303 CTZ = 0;
304 break;
305 case 10: // 2DArray
306 CTZ = 0;
307 break;
308 case 11: // Shadow1DArray
309 SrcZ = SrcY;
310 CTZ = 0;
311 break;
312 case 12: // Shadow2DArray
313 CTZ = 0;
314 break;
315 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000316 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
317 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000318 .addImm(SrcX)
319 .addImm(SrcY)
320 .addImm(SrcZ)
321 .addImm(SrcW)
322 .addImm(0)
323 .addImm(0)
324 .addImm(0)
325 .addImm(0)
326 .addImm(1)
327 .addImm(2)
328 .addImm(3)
329 .addOperand(RID)
330 .addOperand(SID)
331 .addImm(CTX)
332 .addImm(CTY)
333 .addImm(CTZ)
334 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000335 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
336 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000337 .addImm(SrcX)
338 .addImm(SrcY)
339 .addImm(SrcZ)
340 .addImm(SrcW)
341 .addImm(0)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(1)
346 .addImm(2)
347 .addImm(3)
348 .addOperand(RID)
349 .addOperand(SID)
350 .addImm(CTX)
351 .addImm(CTY)
352 .addImm(CTZ)
353 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000354 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
355 .addOperand(MI->getOperand(0))
356 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 .addReg(T0, RegState::Implicit)
375 .addReg(T1, RegState::Implicit);
376 break;
377 }
378
379 case AMDGPU::TXD_SHADOW: {
380 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
381 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000382 MachineOperand &RID = MI->getOperand(4);
383 MachineOperand &SID = MI->getOperand(5);
384 unsigned TextureId = MI->getOperand(6).getImm();
385 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
386 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
387
388 switch (TextureId) {
389 case 5: // Rect
390 CTX = CTY = 0;
391 break;
392 case 6: // Shadow1D
393 SrcW = SrcZ;
394 break;
395 case 7: // Shadow2D
396 SrcW = SrcZ;
397 break;
398 case 8: // ShadowRect
399 CTX = CTY = 0;
400 SrcW = SrcZ;
401 break;
402 case 9: // 1DArray
403 SrcZ = SrcY;
404 CTZ = 0;
405 break;
406 case 10: // 2DArray
407 CTZ = 0;
408 break;
409 case 11: // Shadow1DArray
410 SrcZ = SrcY;
411 CTZ = 0;
412 break;
413 case 12: // Shadow2DArray
414 CTZ = 0;
415 break;
416 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000417
418 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
419 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000420 .addImm(SrcX)
421 .addImm(SrcY)
422 .addImm(SrcZ)
423 .addImm(SrcW)
424 .addImm(0)
425 .addImm(0)
426 .addImm(0)
427 .addImm(0)
428 .addImm(1)
429 .addImm(2)
430 .addImm(3)
431 .addOperand(RID)
432 .addOperand(SID)
433 .addImm(CTX)
434 .addImm(CTY)
435 .addImm(CTZ)
436 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000437 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
438 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000439 .addImm(SrcX)
440 .addImm(SrcY)
441 .addImm(SrcZ)
442 .addImm(SrcW)
443 .addImm(0)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(1)
448 .addImm(2)
449 .addImm(3)
450 .addOperand(RID)
451 .addOperand(SID)
452 .addImm(CTX)
453 .addImm(CTY)
454 .addImm(CTZ)
455 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000456 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
457 .addOperand(MI->getOperand(0))
458 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 .addReg(T0, RegState::Implicit)
477 .addReg(T1, RegState::Implicit);
478 break;
479 }
480
481 case AMDGPU::BRANCH:
482 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000483 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 break;
485
486 case AMDGPU::BRANCH_COND_f32: {
487 MachineInstr *NewMI =
488 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
489 AMDGPU::PREDICATE_BIT)
490 .addOperand(MI->getOperand(1))
491 .addImm(OPCODE_IS_NOT_ZERO)
492 .addImm(0); // Flags
493 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000494 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 .addOperand(MI->getOperand(0))
496 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
497 break;
498 }
499
500 case AMDGPU::BRANCH_COND_i32: {
501 MachineInstr *NewMI =
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
503 AMDGPU::PREDICATE_BIT)
504 .addOperand(MI->getOperand(1))
505 .addImm(OPCODE_IS_NOT_ZERO_INT)
506 .addImm(0); // Flags
507 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 .addOperand(MI->getOperand(0))
510 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
511 break;
512 }
513
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case AMDGPU::EG_ExportSwz:
515 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000516 // Instruction is left unmodified if its not the last one of its type
517 bool isLastInstructionOfItsType = true;
518 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000519 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000520 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000521 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000522 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
523 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
524 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
525 .getImm();
526 if (CurrentInstExportType == InstExportType) {
527 isLastInstructionOfItsType = false;
528 break;
529 }
530 }
531 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000532 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000533 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 return BB;
535 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
536 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
537 .addOperand(MI->getOperand(0))
538 .addOperand(MI->getOperand(1))
539 .addOperand(MI->getOperand(2))
540 .addOperand(MI->getOperand(3))
541 .addOperand(MI->getOperand(4))
542 .addOperand(MI->getOperand(5))
543 .addOperand(MI->getOperand(6))
544 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000545 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 break;
547 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000548 case AMDGPU::RETURN: {
549 // RETURN instructions must have the live-out registers as implicit uses,
550 // otherwise they appear dead.
551 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
552 MachineInstrBuilder MIB(*MF, MI);
553 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
554 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
555 return BB;
556 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 }
558
559 MI->eraseFromParent();
560 return BB;
561}
562
563//===----------------------------------------------------------------------===//
564// Custom DAG Lowering Operations
565//===----------------------------------------------------------------------===//
566
Tom Stellard75aadc22012-12-11 21:25:42 +0000567SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000568 MachineFunction &MF = DAG.getMachineFunction();
569 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000570 switch (Op.getOpcode()) {
571 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000572 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
573 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000574 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000575 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000576 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000577 case ISD::FCOS:
578 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000579 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000581 case ISD::LOAD: {
582 SDValue Result = LowerLOAD(Op, DAG);
583 assert((!Result.getNode() ||
584 Result.getNode()->getNumValues() == 2) &&
585 "Load should return a value and a chain");
586 return Result;
587 }
588
Matt Arsenault1d555c42014-06-23 18:00:55 +0000589 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000590 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 case ISD::INTRINSIC_VOID: {
592 SDValue Chain = Op.getOperand(0);
593 unsigned IntrinsicID =
594 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
595 switch (IntrinsicID) {
596 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000597 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
598 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000599 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000600 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000602 case AMDGPUIntrinsic::R600_store_swizzle: {
603 const SDValue Args[8] = {
604 Chain,
605 Op.getOperand(2), // Export Value
606 Op.getOperand(3), // ArrayBase
607 Op.getOperand(4), // Type
608 DAG.getConstant(0, MVT::i32), // SWZ_X
609 DAG.getConstant(1, MVT::i32), // SWZ_Y
610 DAG.getConstant(2, MVT::i32), // SWZ_Z
611 DAG.getConstant(3, MVT::i32) // SWZ_W
612 };
Craig Topper48d114b2014-04-26 18:35:24 +0000613 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000614 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000615
Tom Stellard75aadc22012-12-11 21:25:42 +0000616 // default for switch(IntrinsicID)
617 default: break;
618 }
619 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
620 break;
621 }
622 case ISD::INTRINSIC_WO_CHAIN: {
623 unsigned IntrinsicID =
624 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
625 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000626 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 switch(IntrinsicID) {
628 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000629 case AMDGPUIntrinsic::R600_load_input: {
630 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
631 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
632 MachineFunction &MF = DAG.getMachineFunction();
633 MachineRegisterInfo &MRI = MF.getRegInfo();
634 MRI.addLiveIn(Reg);
635 return DAG.getCopyFromReg(DAG.getEntryNode(),
636 SDLoc(DAG.getEntryNode()), Reg, VT);
637 }
638
639 case AMDGPUIntrinsic::R600_interp_input: {
640 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
641 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
642 MachineSDNode *interp;
643 if (ijb < 0) {
644 const MachineFunction &MF = DAG.getMachineFunction();
645 const R600InstrInfo *TII =
646 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
647 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
648 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
649 return DAG.getTargetExtractSubreg(
650 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
651 DL, MVT::f32, SDValue(interp, 0));
652 }
653 MachineFunction &MF = DAG.getMachineFunction();
654 MachineRegisterInfo &MRI = MF.getRegInfo();
655 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
656 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
657 MRI.addLiveIn(RegisterI);
658 MRI.addLiveIn(RegisterJ);
659 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
660 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
661 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
662 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
663
664 if (slot % 4 < 2)
665 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
666 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
667 RegisterJNode, RegisterINode);
668 else
669 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
670 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
671 RegisterJNode, RegisterINode);
672 return SDValue(interp, slot % 2);
673 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000674 case AMDGPUIntrinsic::R600_interp_xy:
675 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000676 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000677 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000678 SDValue RegisterINode = Op.getOperand(2);
679 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000680
Vincent Lejeunef143af32013-11-11 22:10:24 +0000681 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000682 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000683 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000684 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000685 else
686 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000687 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000688 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000689 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
690 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000691 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000692 case AMDGPUIntrinsic::R600_tex:
693 case AMDGPUIntrinsic::R600_texc:
694 case AMDGPUIntrinsic::R600_txl:
695 case AMDGPUIntrinsic::R600_txlc:
696 case AMDGPUIntrinsic::R600_txb:
697 case AMDGPUIntrinsic::R600_txbc:
698 case AMDGPUIntrinsic::R600_txf:
699 case AMDGPUIntrinsic::R600_txq:
700 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000701 case AMDGPUIntrinsic::R600_ddy:
702 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000703 unsigned TextureOp;
704 switch (IntrinsicID) {
705 case AMDGPUIntrinsic::R600_tex:
706 TextureOp = 0;
707 break;
708 case AMDGPUIntrinsic::R600_texc:
709 TextureOp = 1;
710 break;
711 case AMDGPUIntrinsic::R600_txl:
712 TextureOp = 2;
713 break;
714 case AMDGPUIntrinsic::R600_txlc:
715 TextureOp = 3;
716 break;
717 case AMDGPUIntrinsic::R600_txb:
718 TextureOp = 4;
719 break;
720 case AMDGPUIntrinsic::R600_txbc:
721 TextureOp = 5;
722 break;
723 case AMDGPUIntrinsic::R600_txf:
724 TextureOp = 6;
725 break;
726 case AMDGPUIntrinsic::R600_txq:
727 TextureOp = 7;
728 break;
729 case AMDGPUIntrinsic::R600_ddx:
730 TextureOp = 8;
731 break;
732 case AMDGPUIntrinsic::R600_ddy:
733 TextureOp = 9;
734 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000735 case AMDGPUIntrinsic::R600_ldptr:
736 TextureOp = 10;
737 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000738 default:
739 llvm_unreachable("Unknow Texture Operation");
740 }
741
742 SDValue TexArgs[19] = {
743 DAG.getConstant(TextureOp, MVT::i32),
744 Op.getOperand(1),
745 DAG.getConstant(0, MVT::i32),
746 DAG.getConstant(1, MVT::i32),
747 DAG.getConstant(2, MVT::i32),
748 DAG.getConstant(3, MVT::i32),
749 Op.getOperand(2),
750 Op.getOperand(3),
751 Op.getOperand(4),
752 DAG.getConstant(0, MVT::i32),
753 DAG.getConstant(1, MVT::i32),
754 DAG.getConstant(2, MVT::i32),
755 DAG.getConstant(3, MVT::i32),
756 Op.getOperand(5),
757 Op.getOperand(6),
758 Op.getOperand(7),
759 Op.getOperand(8),
760 Op.getOperand(9),
761 Op.getOperand(10)
762 };
Craig Topper48d114b2014-04-26 18:35:24 +0000763 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000764 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000765 case AMDGPUIntrinsic::AMDGPU_dp4: {
766 SDValue Args[8] = {
767 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
768 DAG.getConstant(0, MVT::i32)),
769 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
770 DAG.getConstant(0, MVT::i32)),
771 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
772 DAG.getConstant(1, MVT::i32)),
773 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
774 DAG.getConstant(1, MVT::i32)),
775 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
776 DAG.getConstant(2, MVT::i32)),
777 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
778 DAG.getConstant(2, MVT::i32)),
779 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
780 DAG.getConstant(3, MVT::i32)),
781 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
782 DAG.getConstant(3, MVT::i32))
783 };
Craig Topper48d114b2014-04-26 18:35:24 +0000784 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000785 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000786
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000787 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000788 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000789 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000790 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000791 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000792 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000795 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000801 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000802 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return LowerImplicitParameter(DAG, VT, DL, 8);
805
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000806 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000807 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
808 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
811 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000812 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000813 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
814 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000815 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000816 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
817 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000818 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
820 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000821 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000822 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
823 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000824 case Intrinsic::AMDGPU_rsq:
825 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
826 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 }
828 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
829 break;
830 }
831 } // end switch(Op.getOpcode())
832 return SDValue();
833}
834
835void R600TargetLowering::ReplaceNodeResults(SDNode *N,
836 SmallVectorImpl<SDValue> &Results,
837 SelectionDAG &DAG) const {
838 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000839 default:
840 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
841 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000842 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000843 return;
Jan Vesely343cd6f02014-06-22 21:43:01 +0000844 case ISD::UDIV: {
845 SDValue Op = SDValue(N, 0);
846 SDLoc DL(Op);
847 EVT VT = Op.getValueType();
848 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
849 N->getOperand(0), N->getOperand(1));
850 Results.push_back(UDIVREM);
851 break;
852 }
853 case ISD::UREM: {
854 SDValue Op = SDValue(N, 0);
855 SDLoc DL(Op);
856 EVT VT = Op.getValueType();
857 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
858 N->getOperand(0), N->getOperand(1));
859 Results.push_back(UDIVREM.getValue(1));
860 break;
861 }
862 case ISD::SDIV: {
863 SDValue Op = SDValue(N, 0);
864 SDLoc DL(Op);
865 EVT VT = Op.getValueType();
866 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
867 N->getOperand(0), N->getOperand(1));
868 Results.push_back(SDIVREM);
869 break;
870 }
871 case ISD::SREM: {
872 SDValue Op = SDValue(N, 0);
873 SDLoc DL(Op);
874 EVT VT = Op.getValueType();
875 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
876 N->getOperand(0), N->getOperand(1));
877 Results.push_back(SDIVREM.getValue(1));
878 break;
879 }
880 case ISD::SDIVREM: {
881 SDValue Op = SDValue(N, 1);
882 SDValue RES = LowerSDIVREM(Op, DAG);
883 Results.push_back(RES);
884 Results.push_back(RES.getValue(1));
885 break;
886 }
887 case ISD::UDIVREM: {
888 SDValue Op = SDValue(N, 0);
889 SDLoc DL(Op);
890 EVT VT = Op.getValueType();
891 EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
892
893 SDValue one = DAG.getConstant(1, HalfVT);
894 SDValue zero = DAG.getConstant(0, HalfVT);
895
896 //HiLo split
897 SDValue LHS = N->getOperand(0);
898 SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
899 SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
900
901 SDValue RHS = N->getOperand(1);
902 SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
903 SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
904
905 // Get Speculative values
906 SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
907 SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
908
909 SDValue REM_Hi = zero;
910 SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
911
912 SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
913 SDValue DIV_Lo = zero;
914
915 const unsigned halfBitWidth = HalfVT.getSizeInBits();
916
917 for (unsigned i = 0; i < halfBitWidth; ++i) {
918 SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
919 // Get Value of high bit
920 SDValue HBit;
921 if (halfBitWidth == 32 && Subtarget->hasBFE()) {
922 HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
923 } else {
924 HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
925 HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
926 }
927
928 SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
929 DAG.getConstant(halfBitWidth - 1, HalfVT));
930 REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
931 REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
932
933 REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
934 REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
935
936
937 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
938
939 SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
940 SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
941
942 DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
943
944 // Update REM
945
946 SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
947
948 REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
949 REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
950 REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
951 }
952
953 SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
954 SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
955 Results.push_back(DIV);
956 Results.push_back(REM);
957 break;
958 }
959 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000960}
961
Tom Stellard880a80a2014-06-17 16:53:14 +0000962SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
963 SDValue Vector) const {
964
965 SDLoc DL(Vector);
966 EVT VecVT = Vector.getValueType();
967 EVT EltVT = VecVT.getVectorElementType();
968 SmallVector<SDValue, 8> Args;
969
970 for (unsigned i = 0, e = VecVT.getVectorNumElements();
971 i != e; ++i) {
972 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
973 Vector, DAG.getConstant(i, getVectorIdxTy())));
974 }
975
976 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
977}
978
979SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
980 SelectionDAG &DAG) const {
981
982 SDLoc DL(Op);
983 SDValue Vector = Op.getOperand(0);
984 SDValue Index = Op.getOperand(1);
985
986 if (isa<ConstantSDNode>(Index) ||
987 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
988 return Op;
989
990 Vector = vectorToVerticalVector(DAG, Vector);
991 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
992 Vector, Index);
993}
994
995SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
996 SelectionDAG &DAG) const {
997 SDLoc DL(Op);
998 SDValue Vector = Op.getOperand(0);
999 SDValue Value = Op.getOperand(1);
1000 SDValue Index = Op.getOperand(2);
1001
1002 if (isa<ConstantSDNode>(Index) ||
1003 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
1004 return Op;
1005
1006 Vector = vectorToVerticalVector(DAG, Vector);
1007 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
1008 Vector, Value, Index);
1009 return vectorToVerticalVector(DAG, Insert);
1010}
1011
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001012SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
1013 // On hw >= R700, COS/SIN input must be between -1. and 1.
1014 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
1015 EVT VT = Op.getValueType();
1016 SDValue Arg = Op.getOperand(0);
1017 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
1018 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
1019 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
1020 DAG.getConstantFP(0.15915494309, MVT::f32)),
1021 DAG.getConstantFP(0.5, MVT::f32)));
1022 unsigned TrigNode;
1023 switch (Op.getOpcode()) {
1024 case ISD::FCOS:
1025 TrigNode = AMDGPUISD::COS_HW;
1026 break;
1027 case ISD::FSIN:
1028 TrigNode = AMDGPUISD::SIN_HW;
1029 break;
1030 default:
1031 llvm_unreachable("Wrong trig opcode");
1032 }
1033 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
1034 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
1035 DAG.getConstantFP(-0.5, MVT::f32)));
1036 if (Gen >= AMDGPUSubtarget::R700)
1037 return TrigVal;
1038 // On R600 hw, COS/SIN input must be between -Pi and Pi.
1039 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
1040 DAG.getConstantFP(3.14159265359, MVT::f32));
1041}
1042
Jan Vesely25f36272014-06-18 12:27:13 +00001043SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1044 SDLoc DL(Op);
1045 EVT VT = Op.getValueType();
1046
1047 SDValue Lo = Op.getOperand(0);
1048 SDValue Hi = Op.getOperand(1);
1049 SDValue Shift = Op.getOperand(2);
1050 SDValue Zero = DAG.getConstant(0, VT);
1051 SDValue One = DAG.getConstant(1, VT);
1052
1053 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1054 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1055 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1056 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1057
1058 // The dance around Width1 is necessary for 0 special case.
1059 // Without it the CompShift might be 32, producing incorrect results in
1060 // Overflow. So we do the shift in two steps, the alternative is to
1061 // add a conditional to filter the special case.
1062
1063 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1064 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1065
1066 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1067 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1068 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1069
1070 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1071 SDValue LoBig = Zero;
1072
1073 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1074 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1075
1076 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1077}
1078
Jan Vesely900ff2e2014-06-18 12:27:15 +00001079SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1080 SDLoc DL(Op);
1081 EVT VT = Op.getValueType();
1082
1083 SDValue Lo = Op.getOperand(0);
1084 SDValue Hi = Op.getOperand(1);
1085 SDValue Shift = Op.getOperand(2);
1086 SDValue Zero = DAG.getConstant(0, VT);
1087 SDValue One = DAG.getConstant(1, VT);
1088
Jan Veselyecf51332014-06-18 12:27:17 +00001089 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1090
Jan Vesely900ff2e2014-06-18 12:27:15 +00001091 SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
1092 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
1093 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1094 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1095
1096 // The dance around Width1 is necessary for 0 special case.
1097 // Without it the CompShift might be 32, producing incorrect results in
1098 // Overflow. So we do the shift in two steps, the alternative is to
1099 // add a conditional to filter the special case.
1100
1101 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1102 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1103
Jan Veselyecf51332014-06-18 12:27:17 +00001104 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001105 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1106 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1107
Jan Veselyecf51332014-06-18 12:27:17 +00001108 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1109 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001110
1111 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1112 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1113
1114 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1115}
1116
Tom Stellard75aadc22012-12-11 21:25:42 +00001117SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1118 return DAG.getNode(
1119 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001120 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +00001121 MVT::i1,
1122 Op, DAG.getConstantFP(0.0f, MVT::f32),
1123 DAG.getCondCode(ISD::SETNE)
1124 );
1125}
1126
Tom Stellard75aadc22012-12-11 21:25:42 +00001127SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001128 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001129 unsigned DwordOffset) const {
1130 unsigned ByteOffset = DwordOffset * 4;
1131 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001132 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001133
1134 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1135 assert(isInt<16>(ByteOffset));
1136
1137 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1138 DAG.getConstant(ByteOffset, MVT::i32), // PTR
1139 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1140 false, false, false, 0);
1141}
1142
Tom Stellard75aadc22012-12-11 21:25:42 +00001143bool R600TargetLowering::isZero(SDValue Op) const {
1144 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1145 return Cst->isNullValue();
1146 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1147 return CstFP->isZero();
1148 } else {
1149 return false;
1150 }
1151}
1152
1153SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001154 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001155 EVT VT = Op.getValueType();
1156
1157 SDValue LHS = Op.getOperand(0);
1158 SDValue RHS = Op.getOperand(1);
1159 SDValue True = Op.getOperand(2);
1160 SDValue False = Op.getOperand(3);
1161 SDValue CC = Op.getOperand(4);
1162 SDValue Temp;
1163
1164 // LHS and RHS are guaranteed to be the same value type
1165 EVT CompareVT = LHS.getValueType();
1166
1167 // Check if we can lower this to a native operation.
1168
Tom Stellard2add82d2013-03-08 15:37:09 +00001169 // Try to lower to a SET* instruction:
1170 //
1171 // SET* can match the following patterns:
1172 //
Tom Stellardcd428182013-09-28 02:50:38 +00001173 // select_cc f32, f32, -1, 0, cc_supported
1174 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1175 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001176 //
1177
1178 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001179 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1180 ISD::CondCode InverseCC =
1181 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001182 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1183 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1184 std::swap(False, True);
1185 CC = DAG.getCondCode(InverseCC);
1186 } else {
1187 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1188 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1189 std::swap(False, True);
1190 std::swap(LHS, RHS);
1191 CC = DAG.getCondCode(SwapInvCC);
1192 }
1193 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001194 }
1195
1196 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1197 (CompareVT == VT || VT == MVT::i32)) {
1198 // This can be matched by a SET* instruction.
1199 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1200 }
1201
Tom Stellard75aadc22012-12-11 21:25:42 +00001202 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001203 //
1204 // CND* can match the following patterns:
1205 //
Tom Stellardcd428182013-09-28 02:50:38 +00001206 // select_cc f32, 0.0, f32, f32, cc_supported
1207 // select_cc f32, 0.0, i32, i32, cc_supported
1208 // select_cc i32, 0, f32, f32, cc_supported
1209 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001210 //
Tom Stellardcd428182013-09-28 02:50:38 +00001211
1212 // Try to move the zero value to the RHS
1213 if (isZero(LHS)) {
1214 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1215 // Try swapping the operands
1216 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1217 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1218 std::swap(LHS, RHS);
1219 CC = DAG.getCondCode(CCSwapped);
1220 } else {
1221 // Try inverting the conditon and then swapping the operands
1222 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1223 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1224 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1225 std::swap(True, False);
1226 std::swap(LHS, RHS);
1227 CC = DAG.getCondCode(CCSwapped);
1228 }
1229 }
1230 }
1231 if (isZero(RHS)) {
1232 SDValue Cond = LHS;
1233 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001234 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1235 if (CompareVT != VT) {
1236 // Bitcast True / False to the correct types. This will end up being
1237 // a nop, but it allows us to define only a single pattern in the
1238 // .TD files for each CND* instruction rather than having to have
1239 // one pattern for integer True/False and one for fp True/False
1240 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1241 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1242 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001243
1244 switch (CCOpcode) {
1245 case ISD::SETONE:
1246 case ISD::SETUNE:
1247 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001248 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1249 Temp = True;
1250 True = False;
1251 False = Temp;
1252 break;
1253 default:
1254 break;
1255 }
1256 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1257 Cond, Zero,
1258 True, False,
1259 DAG.getCondCode(CCOpcode));
1260 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1261 }
1262
Tom Stellard75aadc22012-12-11 21:25:42 +00001263 // If we make it this for it means we have no native instructions to handle
1264 // this SELECT_CC, so we must lower it.
1265 SDValue HWTrue, HWFalse;
1266
1267 if (CompareVT == MVT::f32) {
1268 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1269 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1270 } else if (CompareVT == MVT::i32) {
1271 HWTrue = DAG.getConstant(-1, CompareVT);
1272 HWFalse = DAG.getConstant(0, CompareVT);
1273 }
1274 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001275 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001276 }
1277
1278 // Lower this unsupported SELECT_CC into a combination of two supported
1279 // SELECT_CC operations.
1280 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1281
1282 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1283 Cond, HWFalse,
1284 True, False,
1285 DAG.getCondCode(ISD::SETNE));
1286}
1287
Alp Tokercb402912014-01-24 17:20:08 +00001288/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001289/// convert these pointers to a register index. Each register holds
1290/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1291/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1292/// for indirect addressing.
1293SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1294 unsigned StackWidth,
1295 SelectionDAG &DAG) const {
1296 unsigned SRLPad;
1297 switch(StackWidth) {
1298 case 1:
1299 SRLPad = 2;
1300 break;
1301 case 2:
1302 SRLPad = 3;
1303 break;
1304 case 4:
1305 SRLPad = 4;
1306 break;
1307 default: llvm_unreachable("Invalid stack width");
1308 }
1309
Andrew Trickef9de2a2013-05-25 02:42:55 +00001310 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001311 DAG.getConstant(SRLPad, MVT::i32));
1312}
1313
1314void R600TargetLowering::getStackAddress(unsigned StackWidth,
1315 unsigned ElemIdx,
1316 unsigned &Channel,
1317 unsigned &PtrIncr) const {
1318 switch (StackWidth) {
1319 default:
1320 case 1:
1321 Channel = 0;
1322 if (ElemIdx > 0) {
1323 PtrIncr = 1;
1324 } else {
1325 PtrIncr = 0;
1326 }
1327 break;
1328 case 2:
1329 Channel = ElemIdx % 2;
1330 if (ElemIdx == 2) {
1331 PtrIncr = 1;
1332 } else {
1333 PtrIncr = 0;
1334 }
1335 break;
1336 case 4:
1337 Channel = ElemIdx;
1338 PtrIncr = 0;
1339 break;
1340 }
1341}
1342
Tom Stellard75aadc22012-12-11 21:25:42 +00001343SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001344 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001345 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1346 SDValue Chain = Op.getOperand(0);
1347 SDValue Value = Op.getOperand(1);
1348 SDValue Ptr = Op.getOperand(2);
1349
Tom Stellard2ffc3302013-08-26 15:05:44 +00001350 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001351 if (Result.getNode()) {
1352 return Result;
1353 }
1354
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001355 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1356 if (StoreNode->isTruncatingStore()) {
1357 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001358 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001359 EVT MemVT = StoreNode->getMemoryVT();
1360 SDValue MaskConstant;
1361 if (MemVT == MVT::i8) {
1362 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1363 } else {
1364 assert(MemVT == MVT::i16);
1365 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1366 }
1367 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1368 DAG.getConstant(2, MVT::i32));
1369 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1370 DAG.getConstant(0x00000003, VT));
1371 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1372 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1373 DAG.getConstant(3, VT));
1374 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1375 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1376 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1377 // vector instead.
1378 SDValue Src[4] = {
1379 ShiftedValue,
1380 DAG.getConstant(0, MVT::i32),
1381 DAG.getConstant(0, MVT::i32),
1382 Mask
1383 };
Craig Topper48d114b2014-04-26 18:35:24 +00001384 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001385 SDValue Args[3] = { Chain, Input, DWordAddr };
1386 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001387 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001388 StoreNode->getMemOperand());
1389 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1390 Value.getValueType().bitsGE(MVT::i32)) {
1391 // Convert pointer from byte address to dword address.
1392 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1393 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1394 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001395
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001396 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001397 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001398 } else {
1399 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1400 }
1401 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001402 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001403 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001404
1405 EVT ValueVT = Value.getValueType();
1406
1407 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1408 return SDValue();
1409 }
1410
Tom Stellarde9373602014-01-22 19:24:14 +00001411 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1412 if (Ret.getNode()) {
1413 return Ret;
1414 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001415 // Lowering for indirect addressing
1416
1417 const MachineFunction &MF = DAG.getMachineFunction();
1418 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1419 getTargetMachine().getFrameLowering());
1420 unsigned StackWidth = TFL->getStackWidth(MF);
1421
1422 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1423
1424 if (ValueVT.isVector()) {
1425 unsigned NumElemVT = ValueVT.getVectorNumElements();
1426 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001427 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001428
1429 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1430 "vector width in load");
1431
1432 for (unsigned i = 0; i < NumElemVT; ++i) {
1433 unsigned Channel, PtrIncr;
1434 getStackAddress(StackWidth, i, Channel, PtrIncr);
1435 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1436 DAG.getConstant(PtrIncr, MVT::i32));
1437 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1438 Value, DAG.getConstant(i, MVT::i32));
1439
1440 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1441 Chain, Elem, Ptr,
1442 DAG.getTargetConstant(Channel, MVT::i32));
1443 }
Craig Topper48d114b2014-04-26 18:35:24 +00001444 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001445 } else {
1446 if (ValueVT == MVT::i8) {
1447 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1448 }
1449 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001450 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001451 }
1452
1453 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001454}
1455
Tom Stellard365366f2013-01-23 02:09:06 +00001456// return (512 + (kc_bank << 12)
1457static int
1458ConstantAddressBlock(unsigned AddressSpace) {
1459 switch (AddressSpace) {
1460 case AMDGPUAS::CONSTANT_BUFFER_0:
1461 return 512;
1462 case AMDGPUAS::CONSTANT_BUFFER_1:
1463 return 512 + 4096;
1464 case AMDGPUAS::CONSTANT_BUFFER_2:
1465 return 512 + 4096 * 2;
1466 case AMDGPUAS::CONSTANT_BUFFER_3:
1467 return 512 + 4096 * 3;
1468 case AMDGPUAS::CONSTANT_BUFFER_4:
1469 return 512 + 4096 * 4;
1470 case AMDGPUAS::CONSTANT_BUFFER_5:
1471 return 512 + 4096 * 5;
1472 case AMDGPUAS::CONSTANT_BUFFER_6:
1473 return 512 + 4096 * 6;
1474 case AMDGPUAS::CONSTANT_BUFFER_7:
1475 return 512 + 4096 * 7;
1476 case AMDGPUAS::CONSTANT_BUFFER_8:
1477 return 512 + 4096 * 8;
1478 case AMDGPUAS::CONSTANT_BUFFER_9:
1479 return 512 + 4096 * 9;
1480 case AMDGPUAS::CONSTANT_BUFFER_10:
1481 return 512 + 4096 * 10;
1482 case AMDGPUAS::CONSTANT_BUFFER_11:
1483 return 512 + 4096 * 11;
1484 case AMDGPUAS::CONSTANT_BUFFER_12:
1485 return 512 + 4096 * 12;
1486 case AMDGPUAS::CONSTANT_BUFFER_13:
1487 return 512 + 4096 * 13;
1488 case AMDGPUAS::CONSTANT_BUFFER_14:
1489 return 512 + 4096 * 14;
1490 case AMDGPUAS::CONSTANT_BUFFER_15:
1491 return 512 + 4096 * 15;
1492 default:
1493 return -1;
1494 }
1495}
1496
1497SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1498{
1499 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001500 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001501 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1502 SDValue Chain = Op.getOperand(0);
1503 SDValue Ptr = Op.getOperand(1);
1504 SDValue LoweredLoad;
1505
Tom Stellarde9373602014-01-22 19:24:14 +00001506 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1507 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001508 SDValue Ops[2] = {
1509 Ret,
1510 Chain
1511 };
Craig Topper64941d92014-04-27 19:20:57 +00001512 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001513 }
1514
1515
Tom Stellard35bb18c2013-08-26 15:06:04 +00001516 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1517 SDValue MergedValues[2] = {
1518 SplitVectorLoad(Op, DAG),
1519 Chain
1520 };
Craig Topper64941d92014-04-27 19:20:57 +00001521 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001522 }
1523
Tom Stellard365366f2013-01-23 02:09:06 +00001524 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001525 if (ConstantBlock > -1 &&
1526 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1527 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001528 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001529 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1530 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001531 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001532 SDValue Slots[4];
1533 for (unsigned i = 0; i < 4; i++) {
1534 // We want Const position encoded with the following formula :
1535 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1536 // const_index is Ptr computed by llvm using an alignment of 16.
1537 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1538 // then div by 4 at the ISel step
1539 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1540 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1541 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1542 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001543 EVT NewVT = MVT::v4i32;
1544 unsigned NumElements = 4;
1545 if (VT.isVector()) {
1546 NewVT = VT;
1547 NumElements = VT.getVectorNumElements();
1548 }
Craig Topper48d114b2014-04-26 18:35:24 +00001549 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001550 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001551 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001552 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001553 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001554 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001555 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001556 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001557 );
1558 }
1559
1560 if (!VT.isVector()) {
1561 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1562 DAG.getConstant(0, MVT::i32));
1563 }
1564
1565 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001566 Result,
1567 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001568 };
Craig Topper64941d92014-04-27 19:20:57 +00001569 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001570 }
1571
Matt Arsenault909d0c02013-10-30 23:43:29 +00001572 // For most operations returning SDValue() will result in the node being
1573 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1574 // need to manually expand loads that may be legal in some address spaces and
1575 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1576 // compute shaders, since the data is sign extended when it is uploaded to the
1577 // buffer. However SEXT loads from other address spaces are not supported, so
1578 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001579 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1580 EVT MemVT = LoadNode->getMemoryVT();
1581 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1582 SDValue ShiftAmount =
1583 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1584 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1585 LoadNode->getPointerInfo(), MemVT,
1586 LoadNode->isVolatile(),
1587 LoadNode->isNonTemporal(),
1588 LoadNode->getAlignment());
1589 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1590 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1591
1592 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001593 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001594 }
1595
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001596 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1597 return SDValue();
1598 }
1599
1600 // Lowering for indirect addressing
1601 const MachineFunction &MF = DAG.getMachineFunction();
1602 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1603 getTargetMachine().getFrameLowering());
1604 unsigned StackWidth = TFL->getStackWidth(MF);
1605
1606 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1607
1608 if (VT.isVector()) {
1609 unsigned NumElemVT = VT.getVectorNumElements();
1610 EVT ElemVT = VT.getVectorElementType();
1611 SDValue Loads[4];
1612
1613 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1614 "vector width in load");
1615
1616 for (unsigned i = 0; i < NumElemVT; ++i) {
1617 unsigned Channel, PtrIncr;
1618 getStackAddress(StackWidth, i, Channel, PtrIncr);
1619 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1620 DAG.getConstant(PtrIncr, MVT::i32));
1621 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1622 Chain, Ptr,
1623 DAG.getTargetConstant(Channel, MVT::i32),
1624 Op.getOperand(2));
1625 }
1626 for (unsigned i = NumElemVT; i < 4; ++i) {
1627 Loads[i] = DAG.getUNDEF(ElemVT);
1628 }
1629 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001630 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001631 } else {
1632 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1633 Chain, Ptr,
1634 DAG.getTargetConstant(0, MVT::i32), // Channel
1635 Op.getOperand(2));
1636 }
1637
Matt Arsenault7939acd2014-04-07 16:44:24 +00001638 SDValue Ops[2] = {
1639 LoweredLoad,
1640 Chain
1641 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001642
Craig Topper64941d92014-04-27 19:20:57 +00001643 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001644}
Tom Stellard75aadc22012-12-11 21:25:42 +00001645
Matt Arsenault1d555c42014-06-23 18:00:55 +00001646SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1647 SDValue Chain = Op.getOperand(0);
1648 SDValue Cond = Op.getOperand(1);
1649 SDValue Jump = Op.getOperand(2);
1650
1651 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1652 Chain, Jump, Cond);
1653}
1654
Tom Stellard75aadc22012-12-11 21:25:42 +00001655/// XXX Only kernel functions are supported, so we can assume for now that
1656/// every function is a kernel function, but in the future we should use
1657/// separate calling conventions for kernel and non-kernel functions.
1658SDValue R600TargetLowering::LowerFormalArguments(
1659 SDValue Chain,
1660 CallingConv::ID CallConv,
1661 bool isVarArg,
1662 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001663 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001664 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001665 SmallVector<CCValAssign, 16> ArgLocs;
1666 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1667 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001668 MachineFunction &MF = DAG.getMachineFunction();
1669 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001670
Tom Stellardaf775432013-10-23 00:44:32 +00001671 SmallVector<ISD::InputArg, 8> LocalIns;
1672
Matt Arsenault209a7b92014-04-18 07:40:20 +00001673 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001674
1675 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001676
Tom Stellard1e803092013-07-23 01:48:18 +00001677 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001678 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001679 EVT VT = Ins[i].VT;
1680 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001681
Vincent Lejeunef143af32013-11-11 22:10:24 +00001682 if (ShaderType != ShaderType::COMPUTE) {
1683 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1684 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1685 InVals.push_back(Register);
1686 continue;
1687 }
1688
Tom Stellard75aadc22012-12-11 21:25:42 +00001689 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001690 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001691
Matt Arsenaultfae02982014-03-17 18:58:11 +00001692 // i64 isn't a legal type, so the register type used ends up as i32, which
1693 // isn't expected here. It attempts to create this sextload, but it ends up
1694 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1695 // for <1 x i64>.
1696
Tom Stellardacfeebf2013-07-23 01:48:05 +00001697 // The first 36 bytes of the input buffer contains information about
1698 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001699
1700 // FIXME: This should really check the extload type, but the handling of
1701 // extload vecto parameters seems to be broken.
1702 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1703 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1704 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001705 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1706 MachinePointerInfo(UndefValue::get(PtrTy)),
1707 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001708
1709 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001710 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001711 }
1712 return Chain;
1713}
1714
Matt Arsenault758659232013-05-18 00:21:46 +00001715EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001716 if (!VT.isVector())
1717 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001718 return VT.changeVectorElementTypeToInteger();
1719}
1720
Matt Arsenault209a7b92014-04-18 07:40:20 +00001721static SDValue CompactSwizzlableVector(
1722 SelectionDAG &DAG, SDValue VectorEntry,
1723 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001724 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1725 assert(RemapSwizzle.empty());
1726 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001727 VectorEntry.getOperand(0),
1728 VectorEntry.getOperand(1),
1729 VectorEntry.getOperand(2),
1730 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001731 };
1732
1733 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001734 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1735 // We mask write here to teach later passes that the ith element of this
1736 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1737 // break false dependencies and additionnaly make assembly easier to read.
1738 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001739 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1740 if (C->isZero()) {
1741 RemapSwizzle[i] = 4; // SEL_0
1742 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1743 } else if (C->isExactlyValue(1.0)) {
1744 RemapSwizzle[i] = 5; // SEL_1
1745 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1746 }
1747 }
1748
1749 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1750 continue;
1751 for (unsigned j = 0; j < i; j++) {
1752 if (NewBldVec[i] == NewBldVec[j]) {
1753 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1754 RemapSwizzle[i] = j;
1755 break;
1756 }
1757 }
1758 }
1759
1760 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001761 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001762}
1763
Benjamin Kramer193960c2013-06-11 13:32:25 +00001764static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1765 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001766 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1767 assert(RemapSwizzle.empty());
1768 SDValue NewBldVec[4] = {
1769 VectorEntry.getOperand(0),
1770 VectorEntry.getOperand(1),
1771 VectorEntry.getOperand(2),
1772 VectorEntry.getOperand(3)
1773 };
1774 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001775 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001776 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001777 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1778 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1779 ->getZExtValue();
1780 if (i == Idx)
1781 isUnmovable[Idx] = true;
1782 }
1783 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001784
1785 for (unsigned i = 0; i < 4; i++) {
1786 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1787 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1788 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001789 if (isUnmovable[Idx])
1790 continue;
1791 // Swap i and Idx
1792 std::swap(NewBldVec[Idx], NewBldVec[i]);
1793 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1794 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001795 }
1796 }
1797
1798 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001799 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001800}
1801
1802
1803SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1804SDValue Swz[4], SelectionDAG &DAG) const {
1805 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1806 // Old -> New swizzle values
1807 DenseMap<unsigned, unsigned> SwizzleRemap;
1808
1809 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1810 for (unsigned i = 0; i < 4; i++) {
1811 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1812 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1813 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1814 }
1815
1816 SwizzleRemap.clear();
1817 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1818 for (unsigned i = 0; i < 4; i++) {
1819 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1820 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1821 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1822 }
1823
1824 return BuildVector;
1825}
1826
1827
Tom Stellard75aadc22012-12-11 21:25:42 +00001828//===----------------------------------------------------------------------===//
1829// Custom DAG Optimizations
1830//===----------------------------------------------------------------------===//
1831
1832SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1833 DAGCombinerInfo &DCI) const {
1834 SelectionDAG &DAG = DCI.DAG;
1835
1836 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001837 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001838 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1839 case ISD::FP_ROUND: {
1840 SDValue Arg = N->getOperand(0);
1841 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001842 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001843 Arg.getOperand(0));
1844 }
1845 break;
1846 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001847
1848 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1849 // (i32 select_cc f32, f32, -1, 0 cc)
1850 //
1851 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1852 // this to one of the SET*_DX10 instructions.
1853 case ISD::FP_TO_SINT: {
1854 SDValue FNeg = N->getOperand(0);
1855 if (FNeg.getOpcode() != ISD::FNEG) {
1856 return SDValue();
1857 }
1858 SDValue SelectCC = FNeg.getOperand(0);
1859 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1860 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1861 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1862 !isHWTrueValue(SelectCC.getOperand(2)) ||
1863 !isHWFalseValue(SelectCC.getOperand(3))) {
1864 return SDValue();
1865 }
1866
Andrew Trickef9de2a2013-05-25 02:42:55 +00001867 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001868 SelectCC.getOperand(0), // LHS
1869 SelectCC.getOperand(1), // RHS
1870 DAG.getConstant(-1, MVT::i32), // True
1871 DAG.getConstant(0, MVT::i32), // Flase
1872 SelectCC.getOperand(4)); // CC
1873
1874 break;
1875 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001876
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001877 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1878 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001879 case ISD::INSERT_VECTOR_ELT: {
1880 SDValue InVec = N->getOperand(0);
1881 SDValue InVal = N->getOperand(1);
1882 SDValue EltNo = N->getOperand(2);
1883 SDLoc dl(N);
1884
1885 // If the inserted element is an UNDEF, just use the input vector.
1886 if (InVal.getOpcode() == ISD::UNDEF)
1887 return InVec;
1888
1889 EVT VT = InVec.getValueType();
1890
1891 // If we can't generate a legal BUILD_VECTOR, exit
1892 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1893 return SDValue();
1894
1895 // Check that we know which element is being inserted
1896 if (!isa<ConstantSDNode>(EltNo))
1897 return SDValue();
1898 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1899
1900 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1901 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1902 // vector elements.
1903 SmallVector<SDValue, 8> Ops;
1904 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1905 Ops.append(InVec.getNode()->op_begin(),
1906 InVec.getNode()->op_end());
1907 } else if (InVec.getOpcode() == ISD::UNDEF) {
1908 unsigned NElts = VT.getVectorNumElements();
1909 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1910 } else {
1911 return SDValue();
1912 }
1913
1914 // Insert the element
1915 if (Elt < Ops.size()) {
1916 // All the operands of BUILD_VECTOR must have the same type;
1917 // we enforce that here.
1918 EVT OpVT = Ops[0].getValueType();
1919 if (InVal.getValueType() != OpVT)
1920 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1921 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1922 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1923 Ops[Elt] = InVal;
1924 }
1925
1926 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001927 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001928 }
1929
Tom Stellard365366f2013-01-23 02:09:06 +00001930 // Extract_vec (Build_vector) generated by custom lowering
1931 // also needs to be customly combined
1932 case ISD::EXTRACT_VECTOR_ELT: {
1933 SDValue Arg = N->getOperand(0);
1934 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1935 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1936 unsigned Element = Const->getZExtValue();
1937 return Arg->getOperand(Element);
1938 }
1939 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001940 if (Arg.getOpcode() == ISD::BITCAST &&
1941 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1942 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1943 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001944 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001945 Arg->getOperand(0).getOperand(Element));
1946 }
1947 }
Tom Stellard365366f2013-01-23 02:09:06 +00001948 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001949
1950 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001951 // Try common optimizations
1952 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1953 if (Ret.getNode())
1954 return Ret;
1955
Tom Stellarde06163a2013-02-07 14:02:35 +00001956 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1957 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001958 //
1959 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1960 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001961 SDValue LHS = N->getOperand(0);
1962 if (LHS.getOpcode() != ISD::SELECT_CC) {
1963 return SDValue();
1964 }
1965
1966 SDValue RHS = N->getOperand(1);
1967 SDValue True = N->getOperand(2);
1968 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001969 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001970
1971 if (LHS.getOperand(2).getNode() != True.getNode() ||
1972 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001973 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001974 return SDValue();
1975 }
1976
Tom Stellard5e524892013-03-08 15:37:11 +00001977 switch (NCC) {
1978 default: return SDValue();
1979 case ISD::SETNE: return LHS;
1980 case ISD::SETEQ: {
1981 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1982 LHSCC = ISD::getSetCCInverse(LHSCC,
1983 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001984 if (DCI.isBeforeLegalizeOps() ||
1985 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1986 return DAG.getSelectCC(SDLoc(N),
1987 LHS.getOperand(0),
1988 LHS.getOperand(1),
1989 LHS.getOperand(2),
1990 LHS.getOperand(3),
1991 LHSCC);
1992 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001993 }
Tom Stellard5e524892013-03-08 15:37:11 +00001994 }
Tom Stellardcd428182013-09-28 02:50:38 +00001995 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001996 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001997
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001998 case AMDGPUISD::EXPORT: {
1999 SDValue Arg = N->getOperand(1);
2000 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2001 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002002
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002003 SDValue NewArgs[8] = {
2004 N->getOperand(0), // Chain
2005 SDValue(),
2006 N->getOperand(2), // ArrayBase
2007 N->getOperand(3), // Type
2008 N->getOperand(4), // SWZ_X
2009 N->getOperand(5), // SWZ_Y
2010 N->getOperand(6), // SWZ_Z
2011 N->getOperand(7) // SWZ_W
2012 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002013 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002014 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00002015 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002016 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002017 case AMDGPUISD::TEXTURE_FETCH: {
2018 SDValue Arg = N->getOperand(1);
2019 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2020 break;
2021
2022 SDValue NewArgs[19] = {
2023 N->getOperand(0),
2024 N->getOperand(1),
2025 N->getOperand(2),
2026 N->getOperand(3),
2027 N->getOperand(4),
2028 N->getOperand(5),
2029 N->getOperand(6),
2030 N->getOperand(7),
2031 N->getOperand(8),
2032 N->getOperand(9),
2033 N->getOperand(10),
2034 N->getOperand(11),
2035 N->getOperand(12),
2036 N->getOperand(13),
2037 N->getOperand(14),
2038 N->getOperand(15),
2039 N->getOperand(16),
2040 N->getOperand(17),
2041 N->getOperand(18),
2042 };
2043 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
2044 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00002045 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002046 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002047 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002048
2049 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002050}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002051
2052static bool
2053FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002054 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002055 const R600InstrInfo *TII =
2056 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2057 if (!Src.isMachineOpcode())
2058 return false;
2059 switch (Src.getMachineOpcode()) {
2060 case AMDGPU::FNEG_R600:
2061 if (!Neg.getNode())
2062 return false;
2063 Src = Src.getOperand(0);
2064 Neg = DAG.getTargetConstant(1, MVT::i32);
2065 return true;
2066 case AMDGPU::FABS_R600:
2067 if (!Abs.getNode())
2068 return false;
2069 Src = Src.getOperand(0);
2070 Abs = DAG.getTargetConstant(1, MVT::i32);
2071 return true;
2072 case AMDGPU::CONST_COPY: {
2073 unsigned Opcode = ParentNode->getMachineOpcode();
2074 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2075
2076 if (!Sel.getNode())
2077 return false;
2078
2079 SDValue CstOffset = Src.getOperand(0);
2080 if (ParentNode->getValueType(0).isVector())
2081 return false;
2082
2083 // Gather constants values
2084 int SrcIndices[] = {
2085 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2086 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2087 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2088 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2089 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2090 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2091 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2092 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2093 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2094 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2095 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2096 };
2097 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002098 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002099 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2100 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2101 continue;
2102 if (HasDst) {
2103 OtherSrcIdx--;
2104 OtherSelIdx--;
2105 }
2106 if (RegisterSDNode *Reg =
2107 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2108 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002109 ConstantSDNode *Cst
2110 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002111 Consts.push_back(Cst->getZExtValue());
2112 }
2113 }
2114 }
2115
Matt Arsenault37c12d72014-05-12 20:42:57 +00002116 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002117 Consts.push_back(Cst->getZExtValue());
2118 if (!TII->fitsConstReadLimitations(Consts)) {
2119 return false;
2120 }
2121
2122 Sel = CstOffset;
2123 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2124 return true;
2125 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002126 case AMDGPU::MOV_IMM_I32:
2127 case AMDGPU::MOV_IMM_F32: {
2128 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2129 uint64_t ImmValue = 0;
2130
2131
2132 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2133 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2134 float FloatValue = FPC->getValueAPF().convertToFloat();
2135 if (FloatValue == 0.0) {
2136 ImmReg = AMDGPU::ZERO;
2137 } else if (FloatValue == 0.5) {
2138 ImmReg = AMDGPU::HALF;
2139 } else if (FloatValue == 1.0) {
2140 ImmReg = AMDGPU::ONE;
2141 } else {
2142 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2143 }
2144 } else {
2145 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2146 uint64_t Value = C->getZExtValue();
2147 if (Value == 0) {
2148 ImmReg = AMDGPU::ZERO;
2149 } else if (Value == 1) {
2150 ImmReg = AMDGPU::ONE_INT;
2151 } else {
2152 ImmValue = Value;
2153 }
2154 }
2155
2156 // Check that we aren't already using an immediate.
2157 // XXX: It's possible for an instruction to have more than one
2158 // immediate operand, but this is not supported yet.
2159 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2160 if (!Imm.getNode())
2161 return false;
2162 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2163 assert(C);
2164 if (C->getZExtValue())
2165 return false;
2166 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
2167 }
2168 Src = DAG.getRegister(ImmReg, MVT::i32);
2169 return true;
2170 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002171 default:
2172 return false;
2173 }
2174}
2175
2176
2177/// \brief Fold the instructions after selecting them
2178SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2179 SelectionDAG &DAG) const {
2180 const R600InstrInfo *TII =
2181 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
2182 if (!Node->isMachineOpcode())
2183 return Node;
2184 unsigned Opcode = Node->getMachineOpcode();
2185 SDValue FakeOp;
2186
2187 std::vector<SDValue> Ops;
Craig Topper66e588b2014-06-29 00:40:57 +00002188 for (const SDUse &I : Node->ops())
2189 Ops.push_back(I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002190
2191 if (Opcode == AMDGPU::DOT_4) {
2192 int OperandIdx[] = {
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2200 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002201 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002202 int NegIdx[] = {
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2211 };
2212 int AbsIdx[] = {
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2221 };
2222 for (unsigned i = 0; i < 8; i++) {
2223 if (OperandIdx[i] < 0)
2224 return Node;
2225 SDValue &Src = Ops[OperandIdx[i] - 1];
2226 SDValue &Neg = Ops[NegIdx[i] - 1];
2227 SDValue &Abs = Ops[AbsIdx[i] - 1];
2228 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2229 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2230 if (HasDst)
2231 SelIdx--;
2232 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002233 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2234 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2235 }
2236 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2237 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2238 SDValue &Src = Ops[i];
2239 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002240 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2241 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002242 } else if (Opcode == AMDGPU::CLAMP_R600) {
2243 SDValue Src = Node->getOperand(0);
2244 if (!Src.isMachineOpcode() ||
2245 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2246 return Node;
2247 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2248 AMDGPU::OpName::clamp);
2249 if (ClampIdx < 0)
2250 return Node;
2251 std::vector<SDValue> Ops;
2252 unsigned NumOp = Src.getNumOperands();
2253 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002254 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00002255 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
2256 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
2257 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002258 } else {
2259 if (!TII->hasInstrModifiers(Opcode))
2260 return Node;
2261 int OperandIdx[] = {
2262 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2263 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2264 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2265 };
2266 int NegIdx[] = {
2267 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2268 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2269 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2270 };
2271 int AbsIdx[] = {
2272 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2273 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2274 -1
2275 };
2276 for (unsigned i = 0; i < 3; i++) {
2277 if (OperandIdx[i] < 0)
2278 return Node;
2279 SDValue &Src = Ops[OperandIdx[i] - 1];
2280 SDValue &Neg = Ops[NegIdx[i] - 1];
2281 SDValue FakeAbs;
2282 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2283 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2284 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002285 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2286 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002287 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002288 ImmIdx--;
2289 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002290 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002291 SDValue &Imm = Ops[ImmIdx];
2292 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002293 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2294 }
2295 }
2296
2297 return Node;
2298}