blob: fa3ee7b92cbf3621070292070a6c99fbb9d60a20 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000289 case AMDGPU::RAT_STORE_TYPED_eg: {
290 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
291 .addOperand(MI->getOperand(0))
292 .addOperand(MI->getOperand(1))
293 .addOperand(MI->getOperand(2))
294 .addImm(isEOP(I)); // Set End of program bit
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Tom Stellard75aadc22012-12-11 21:25:42 +0000298 case AMDGPU::TXD: {
299 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
300 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000301 MachineOperand &RID = MI->getOperand(4);
302 MachineOperand &SID = MI->getOperand(5);
303 unsigned TextureId = MI->getOperand(6).getImm();
304 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
305 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000306
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 switch (TextureId) {
308 case 5: // Rect
309 CTX = CTY = 0;
310 break;
311 case 6: // Shadow1D
312 SrcW = SrcZ;
313 break;
314 case 7: // Shadow2D
315 SrcW = SrcZ;
316 break;
317 case 8: // ShadowRect
318 CTX = CTY = 0;
319 SrcW = SrcZ;
320 break;
321 case 9: // 1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 10: // 2DArray
326 CTZ = 0;
327 break;
328 case 11: // Shadow1DArray
329 SrcZ = SrcY;
330 CTZ = 0;
331 break;
332 case 12: // Shadow2DArray
333 CTZ = 0;
334 break;
335 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
337 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
356 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
375 .addOperand(MI->getOperand(0))
376 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000377 .addImm(SrcX)
378 .addImm(SrcY)
379 .addImm(SrcZ)
380 .addImm(SrcW)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(1)
386 .addImm(2)
387 .addImm(3)
388 .addOperand(RID)
389 .addOperand(SID)
390 .addImm(CTX)
391 .addImm(CTY)
392 .addImm(CTZ)
393 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 .addReg(T0, RegState::Implicit)
395 .addReg(T1, RegState::Implicit);
396 break;
397 }
398
399 case AMDGPU::TXD_SHADOW: {
400 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
401 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000402 MachineOperand &RID = MI->getOperand(4);
403 MachineOperand &SID = MI->getOperand(5);
404 unsigned TextureId = MI->getOperand(6).getImm();
405 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
406 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
407
408 switch (TextureId) {
409 case 5: // Rect
410 CTX = CTY = 0;
411 break;
412 case 6: // Shadow1D
413 SrcW = SrcZ;
414 break;
415 case 7: // Shadow2D
416 SrcW = SrcZ;
417 break;
418 case 8: // ShadowRect
419 CTX = CTY = 0;
420 SrcW = SrcZ;
421 break;
422 case 9: // 1DArray
423 SrcZ = SrcY;
424 CTZ = 0;
425 break;
426 case 10: // 2DArray
427 CTZ = 0;
428 break;
429 case 11: // Shadow1DArray
430 SrcZ = SrcY;
431 CTZ = 0;
432 break;
433 case 12: // Shadow2DArray
434 CTZ = 0;
435 break;
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437
438 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
439 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
458 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
477 .addOperand(MI->getOperand(0))
478 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000479 .addImm(SrcX)
480 .addImm(SrcY)
481 .addImm(SrcZ)
482 .addImm(SrcW)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(0)
487 .addImm(1)
488 .addImm(2)
489 .addImm(3)
490 .addOperand(RID)
491 .addOperand(SID)
492 .addImm(CTX)
493 .addImm(CTY)
494 .addImm(CTZ)
495 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addReg(T0, RegState::Implicit)
497 .addReg(T1, RegState::Implicit);
498 break;
499 }
500
501 case AMDGPU::BRANCH:
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 break;
505
506 case AMDGPU::BRANCH_COND_f32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
520 case AMDGPU::BRANCH_COND_i32: {
521 MachineInstr *NewMI =
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
523 AMDGPU::PREDICATE_BIT)
524 .addOperand(MI->getOperand(1))
525 .addImm(OPCODE_IS_NOT_ZERO_INT)
526 .addImm(0); // Flags
527 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 .addOperand(MI->getOperand(0))
530 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
531 break;
532 }
533
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 case AMDGPU::EG_ExportSwz:
535 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 // Instruction is left unmodified if its not the last one of its type
537 bool isLastInstructionOfItsType = true;
538 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000539 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000540 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
543 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
544 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
545 .getImm();
546 if (CurrentInstExportType == InstExportType) {
547 isLastInstructionOfItsType = false;
548 break;
549 }
550 }
551 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000552 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000553 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 return BB;
555 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
556 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
557 .addOperand(MI->getOperand(0))
558 .addOperand(MI->getOperand(1))
559 .addOperand(MI->getOperand(2))
560 .addOperand(MI->getOperand(3))
561 .addOperand(MI->getOperand(4))
562 .addOperand(MI->getOperand(5))
563 .addOperand(MI->getOperand(6))
564 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 break;
567 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000568 case AMDGPU::RETURN: {
569 // RETURN instructions must have the live-out registers as implicit uses,
570 // otherwise they appear dead.
571 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
572 MachineInstrBuilder MIB(*MF, MI);
573 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
574 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
575 return BB;
576 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000577 }
578
579 MI->eraseFromParent();
580 return BB;
581}
582
583//===----------------------------------------------------------------------===//
584// Custom DAG Lowering Operations
585//===----------------------------------------------------------------------===//
586
Tom Stellard75aadc22012-12-11 21:25:42 +0000587SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000588 MachineFunction &MF = DAG.getMachineFunction();
589 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 switch (Op.getOpcode()) {
591 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000592 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000594 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000595 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000596 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000597 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
598 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000599 case ISD::FCOS:
600 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000603 case ISD::LOAD: {
604 SDValue Result = LowerLOAD(Op, DAG);
605 assert((!Result.getNode() ||
606 Result.getNode()->getNumValues() == 2) &&
607 "Load should return a value and a chain");
608 return Result;
609 }
610
Matt Arsenault1d555c42014-06-23 18:00:55 +0000611 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000612 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000613 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000614 case ISD::INTRINSIC_VOID: {
615 SDValue Chain = Op.getOperand(0);
616 unsigned IntrinsicID =
617 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
618 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000619 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000620 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000621 const SDValue Args[8] = {
622 Chain,
623 Op.getOperand(2), // Export Value
624 Op.getOperand(3), // ArrayBase
625 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000626 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
627 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
628 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
629 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000630 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000631 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000632 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000633
Tom Stellard75aadc22012-12-11 21:25:42 +0000634 // default for switch(IntrinsicID)
635 default: break;
636 }
637 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
638 break;
639 }
640 case ISD::INTRINSIC_WO_CHAIN: {
641 unsigned IntrinsicID =
642 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
643 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000644 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000645 switch(IntrinsicID) {
646 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000647 case AMDGPUIntrinsic::R600_interp_xy:
648 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000649 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000650 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000651 SDValue RegisterINode = Op.getOperand(2);
652 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000653
Vincent Lejeunef143af32013-11-11 22:10:24 +0000654 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000655 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000656 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000657 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000658 else
659 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000660 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000661 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000662 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
663 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000664 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000665 case AMDGPUIntrinsic::r600_tex:
666 case AMDGPUIntrinsic::r600_texc:
667 case AMDGPUIntrinsic::r600_txl:
668 case AMDGPUIntrinsic::r600_txlc:
669 case AMDGPUIntrinsic::r600_txb:
670 case AMDGPUIntrinsic::r600_txbc:
671 case AMDGPUIntrinsic::r600_txf:
672 case AMDGPUIntrinsic::r600_txq:
673 case AMDGPUIntrinsic::r600_ddx:
674 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000675 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000676 unsigned TextureOp;
677 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000678 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000679 TextureOp = 0;
680 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000681 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000682 TextureOp = 1;
683 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000684 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000685 TextureOp = 2;
686 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000687 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000688 TextureOp = 3;
689 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000690 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000691 TextureOp = 4;
692 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000693 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000694 TextureOp = 5;
695 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000696 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000697 TextureOp = 6;
698 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000699 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000700 TextureOp = 7;
701 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000702 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000703 TextureOp = 8;
704 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000705 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000706 TextureOp = 9;
707 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000708 case AMDGPUIntrinsic::R600_ldptr:
709 TextureOp = 10;
710 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000711 default:
712 llvm_unreachable("Unknow Texture Operation");
713 }
714
715 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000716 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000717 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000718 DAG.getConstant(0, DL, MVT::i32),
719 DAG.getConstant(1, DL, MVT::i32),
720 DAG.getConstant(2, DL, MVT::i32),
721 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000722 Op.getOperand(2),
723 Op.getOperand(3),
724 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000725 DAG.getConstant(0, DL, MVT::i32),
726 DAG.getConstant(1, DL, MVT::i32),
727 DAG.getConstant(2, DL, MVT::i32),
728 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000729 Op.getOperand(5),
730 Op.getOperand(6),
731 Op.getOperand(7),
732 Op.getOperand(8),
733 Op.getOperand(9),
734 Op.getOperand(10)
735 };
Craig Topper48d114b2014-04-26 18:35:24 +0000736 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000737 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000738 case AMDGPUIntrinsic::AMDGPU_dp4: {
739 SDValue Args[8] = {
740 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000741 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000742 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000743 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000744 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000745 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000746 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000747 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000748 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000749 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000751 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000753 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000756 };
Craig Topper48d114b2014-04-26 18:35:24 +0000757 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000758 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000759
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000760 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000761 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000762 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000763 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000764 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000765 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000766 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000767 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 8);
778
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000779 case Intrinsic::r600_read_workdim:
780 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000781 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
782 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
783 }
Jan Veselye5121f32014-10-14 20:05:26 +0000784
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000785 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000786 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
787 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000788 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000789 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
790 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000791 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000792 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
793 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000794 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000795 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
796 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
799 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000800 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
802 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000803
804 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000805 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenault79963e82016-02-13 01:03:00 +0000806 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000807
808 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000809 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000810 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
811 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 }
813 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
814 break;
815 }
816 } // end switch(Op.getOpcode())
817 return SDValue();
818}
819
820void R600TargetLowering::ReplaceNodeResults(SDNode *N,
821 SmallVectorImpl<SDValue> &Results,
822 SelectionDAG &DAG) const {
823 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000824 default:
825 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
826 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000827 case ISD::FP_TO_UINT:
828 if (N->getValueType(0) == MVT::i1) {
829 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
830 return;
831 }
832 // Fall-through. Since we don't care about out of bounds values
833 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
834 // considers some extra cases which are not necessary here.
835 case ISD::FP_TO_SINT: {
836 SDValue Result;
837 if (expandFP_TO_SINT(N, Result, DAG))
838 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000839 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000840 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000841 case ISD::SDIVREM: {
842 SDValue Op = SDValue(N, 1);
843 SDValue RES = LowerSDIVREM(Op, DAG);
844 Results.push_back(RES);
845 Results.push_back(RES.getValue(1));
846 break;
847 }
848 case ISD::UDIVREM: {
849 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000850 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000851 break;
852 }
853 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000854}
855
Tom Stellard880a80a2014-06-17 16:53:14 +0000856SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
857 SDValue Vector) const {
858
859 SDLoc DL(Vector);
860 EVT VecVT = Vector.getValueType();
861 EVT EltVT = VecVT.getVectorElementType();
862 SmallVector<SDValue, 8> Args;
863
864 for (unsigned i = 0, e = VecVT.getVectorNumElements();
865 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000866 Args.push_back(DAG.getNode(
867 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
868 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000869 }
870
871 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
872}
873
874SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
875 SelectionDAG &DAG) const {
876
877 SDLoc DL(Op);
878 SDValue Vector = Op.getOperand(0);
879 SDValue Index = Op.getOperand(1);
880
881 if (isa<ConstantSDNode>(Index) ||
882 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
883 return Op;
884
885 Vector = vectorToVerticalVector(DAG, Vector);
886 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
887 Vector, Index);
888}
889
890SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
891 SelectionDAG &DAG) const {
892 SDLoc DL(Op);
893 SDValue Vector = Op.getOperand(0);
894 SDValue Value = Op.getOperand(1);
895 SDValue Index = Op.getOperand(2);
896
897 if (isa<ConstantSDNode>(Index) ||
898 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
899 return Op;
900
901 Vector = vectorToVerticalVector(DAG, Vector);
902 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
903 Vector, Value, Index);
904 return vectorToVerticalVector(DAG, Insert);
905}
906
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000907SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
908 // On hw >= R700, COS/SIN input must be between -1. and 1.
909 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
910 EVT VT = Op.getValueType();
911 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000912 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000913
914 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000915 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
916 DAG.getNode(ISD::FADD, DL, VT,
917 DAG.getNode(ISD::FMUL, DL, VT, Arg,
918 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
919 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000920 unsigned TrigNode;
921 switch (Op.getOpcode()) {
922 case ISD::FCOS:
923 TrigNode = AMDGPUISD::COS_HW;
924 break;
925 case ISD::FSIN:
926 TrigNode = AMDGPUISD::SIN_HW;
927 break;
928 default:
929 llvm_unreachable("Wrong trig opcode");
930 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000931 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
932 DAG.getNode(ISD::FADD, DL, VT, FractPart,
933 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000934 if (Gen >= AMDGPUSubtarget::R700)
935 return TrigVal;
936 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000937 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
938 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000939}
940
Jan Vesely25f36272014-06-18 12:27:13 +0000941SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
942 SDLoc DL(Op);
943 EVT VT = Op.getValueType();
944
945 SDValue Lo = Op.getOperand(0);
946 SDValue Hi = Op.getOperand(1);
947 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000948 SDValue Zero = DAG.getConstant(0, DL, VT);
949 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000950
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000951 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
952 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000953 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
954 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
955
956 // The dance around Width1 is necessary for 0 special case.
957 // Without it the CompShift might be 32, producing incorrect results in
958 // Overflow. So we do the shift in two steps, the alternative is to
959 // add a conditional to filter the special case.
960
961 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
962 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
963
964 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
965 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
966 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
967
968 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
969 SDValue LoBig = Zero;
970
971 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
972 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
973
974 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
975}
976
Jan Vesely900ff2e2014-06-18 12:27:15 +0000977SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
978 SDLoc DL(Op);
979 EVT VT = Op.getValueType();
980
981 SDValue Lo = Op.getOperand(0);
982 SDValue Hi = Op.getOperand(1);
983 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000984 SDValue Zero = DAG.getConstant(0, DL, VT);
985 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000986
Jan Veselyecf51332014-06-18 12:27:17 +0000987 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
988
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000989 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
990 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000991 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
992 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
993
994 // The dance around Width1 is necessary for 0 special case.
995 // Without it the CompShift might be 32, producing incorrect results in
996 // Overflow. So we do the shift in two steps, the alternative is to
997 // add a conditional to filter the special case.
998
999 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1000 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1001
Jan Veselyecf51332014-06-18 12:27:17 +00001002 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001003 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1004 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1005
Jan Veselyecf51332014-06-18 12:27:17 +00001006 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1007 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001008
1009 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1010 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1011
1012 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1013}
1014
Jan Vesely808fff52015-04-30 17:15:56 +00001015SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1016 unsigned mainop, unsigned ovf) const {
1017 SDLoc DL(Op);
1018 EVT VT = Op.getValueType();
1019
1020 SDValue Lo = Op.getOperand(0);
1021 SDValue Hi = Op.getOperand(1);
1022
1023 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1024 // Extend sign.
1025 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1026 DAG.getValueType(MVT::i1));
1027
1028 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1029
1030 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1031}
1032
Tom Stellard75aadc22012-12-11 21:25:42 +00001033SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001034 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001035 return DAG.getNode(
1036 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001037 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001038 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001039 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001040 DAG.getCondCode(ISD::SETNE)
1041 );
1042}
1043
Tom Stellard75aadc22012-12-11 21:25:42 +00001044SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001045 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001046 unsigned DwordOffset) const {
1047 unsigned ByteOffset = DwordOffset * 4;
1048 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001049 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001050
1051 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1052 assert(isInt<16>(ByteOffset));
1053
1054 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001055 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001056 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1057 false, false, false, 0);
1058}
1059
Tom Stellard75aadc22012-12-11 21:25:42 +00001060bool R600TargetLowering::isZero(SDValue Op) const {
1061 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1062 return Cst->isNullValue();
1063 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1064 return CstFP->isZero();
1065 } else {
1066 return false;
1067 }
1068}
1069
1070SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001071 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001072 EVT VT = Op.getValueType();
1073
1074 SDValue LHS = Op.getOperand(0);
1075 SDValue RHS = Op.getOperand(1);
1076 SDValue True = Op.getOperand(2);
1077 SDValue False = Op.getOperand(3);
1078 SDValue CC = Op.getOperand(4);
1079 SDValue Temp;
1080
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001081 if (VT == MVT::f32) {
1082 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1083 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1084 if (MinMax)
1085 return MinMax;
1086 }
1087
Tom Stellard75aadc22012-12-11 21:25:42 +00001088 // LHS and RHS are guaranteed to be the same value type
1089 EVT CompareVT = LHS.getValueType();
1090
1091 // Check if we can lower this to a native operation.
1092
Tom Stellard2add82d2013-03-08 15:37:09 +00001093 // Try to lower to a SET* instruction:
1094 //
1095 // SET* can match the following patterns:
1096 //
Tom Stellardcd428182013-09-28 02:50:38 +00001097 // select_cc f32, f32, -1, 0, cc_supported
1098 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1099 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001100 //
1101
1102 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001103 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1104 ISD::CondCode InverseCC =
1105 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001106 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1107 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1108 std::swap(False, True);
1109 CC = DAG.getCondCode(InverseCC);
1110 } else {
1111 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1112 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1113 std::swap(False, True);
1114 std::swap(LHS, RHS);
1115 CC = DAG.getCondCode(SwapInvCC);
1116 }
1117 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001118 }
1119
1120 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1121 (CompareVT == VT || VT == MVT::i32)) {
1122 // This can be matched by a SET* instruction.
1123 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1124 }
1125
Tom Stellard75aadc22012-12-11 21:25:42 +00001126 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001127 //
1128 // CND* can match the following patterns:
1129 //
Tom Stellardcd428182013-09-28 02:50:38 +00001130 // select_cc f32, 0.0, f32, f32, cc_supported
1131 // select_cc f32, 0.0, i32, i32, cc_supported
1132 // select_cc i32, 0, f32, f32, cc_supported
1133 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001134 //
Tom Stellardcd428182013-09-28 02:50:38 +00001135
1136 // Try to move the zero value to the RHS
1137 if (isZero(LHS)) {
1138 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1139 // Try swapping the operands
1140 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1141 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1142 std::swap(LHS, RHS);
1143 CC = DAG.getCondCode(CCSwapped);
1144 } else {
1145 // Try inverting the conditon and then swapping the operands
1146 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1147 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1148 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1149 std::swap(True, False);
1150 std::swap(LHS, RHS);
1151 CC = DAG.getCondCode(CCSwapped);
1152 }
1153 }
1154 }
1155 if (isZero(RHS)) {
1156 SDValue Cond = LHS;
1157 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001158 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1159 if (CompareVT != VT) {
1160 // Bitcast True / False to the correct types. This will end up being
1161 // a nop, but it allows us to define only a single pattern in the
1162 // .TD files for each CND* instruction rather than having to have
1163 // one pattern for integer True/False and one for fp True/False
1164 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1165 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1166 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001167
1168 switch (CCOpcode) {
1169 case ISD::SETONE:
1170 case ISD::SETUNE:
1171 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001172 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1173 Temp = True;
1174 True = False;
1175 False = Temp;
1176 break;
1177 default:
1178 break;
1179 }
1180 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1181 Cond, Zero,
1182 True, False,
1183 DAG.getCondCode(CCOpcode));
1184 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1185 }
1186
Tom Stellard75aadc22012-12-11 21:25:42 +00001187 // If we make it this for it means we have no native instructions to handle
1188 // this SELECT_CC, so we must lower it.
1189 SDValue HWTrue, HWFalse;
1190
1191 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001192 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1193 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001194 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001195 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1196 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001197 }
1198 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001199 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001200 }
1201
1202 // Lower this unsupported SELECT_CC into a combination of two supported
1203 // SELECT_CC operations.
1204 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1205
1206 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1207 Cond, HWFalse,
1208 True, False,
1209 DAG.getCondCode(ISD::SETNE));
1210}
1211
Alp Tokercb402912014-01-24 17:20:08 +00001212/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001213/// convert these pointers to a register index. Each register holds
1214/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1215/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1216/// for indirect addressing.
1217SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1218 unsigned StackWidth,
1219 SelectionDAG &DAG) const {
1220 unsigned SRLPad;
1221 switch(StackWidth) {
1222 case 1:
1223 SRLPad = 2;
1224 break;
1225 case 2:
1226 SRLPad = 3;
1227 break;
1228 case 4:
1229 SRLPad = 4;
1230 break;
1231 default: llvm_unreachable("Invalid stack width");
1232 }
1233
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001234 SDLoc DL(Ptr);
1235 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1236 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001237}
1238
1239void R600TargetLowering::getStackAddress(unsigned StackWidth,
1240 unsigned ElemIdx,
1241 unsigned &Channel,
1242 unsigned &PtrIncr) const {
1243 switch (StackWidth) {
1244 default:
1245 case 1:
1246 Channel = 0;
1247 if (ElemIdx > 0) {
1248 PtrIncr = 1;
1249 } else {
1250 PtrIncr = 0;
1251 }
1252 break;
1253 case 2:
1254 Channel = ElemIdx % 2;
1255 if (ElemIdx == 2) {
1256 PtrIncr = 1;
1257 } else {
1258 PtrIncr = 0;
1259 }
1260 break;
1261 case 4:
1262 Channel = ElemIdx;
1263 PtrIncr = 0;
1264 break;
1265 }
1266}
1267
Matt Arsenault95245662016-02-11 05:32:46 +00001268SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1269 SelectionDAG &DAG) const {
1270 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001271
Matt Arsenault95245662016-02-11 05:32:46 +00001272 unsigned Mask = 0;
1273 if (Store->getMemoryVT() == MVT::i8) {
1274 Mask = 0xff;
1275 } else if (Store->getMemoryVT() == MVT::i16) {
1276 Mask = 0xffff;
1277 }
1278
1279 SDValue Chain = Store->getChain();
1280 SDValue BasePtr = Store->getBasePtr();
1281 EVT MemVT = Store->getMemoryVT();
1282
1283 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1284 DAG.getConstant(2, DL, MVT::i32));
1285 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1286 Chain, Ptr,
1287 DAG.getTargetConstant(0, DL, MVT::i32));
1288
1289 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1290 DAG.getConstant(0x3, DL, MVT::i32));
1291
1292 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1293 DAG.getConstant(3, DL, MVT::i32));
1294
1295 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1296 Store->getValue());
1297
1298 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1299
1300 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1301 MaskedValue, ShiftAmt);
1302
1303 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1304 DAG.getConstant(Mask, DL, MVT::i32),
1305 ShiftAmt);
1306 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1307 DAG.getConstant(0xffffffff, DL, MVT::i32));
1308 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1309
1310 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1311 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1312 Chain, Value, Ptr,
1313 DAG.getTargetConstant(0, DL, MVT::i32));
1314}
1315
1316SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1317 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001318 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001319
Matt Arsenault95245662016-02-11 05:32:46 +00001320 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1321 unsigned AS = StoreNode->getAddressSpace();
1322 SDValue Value = StoreNode->getValue();
1323 EVT ValueVT = Value.getValueType();
1324
1325 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1326 ValueVT.isVector()) {
1327 return SplitVectorStore(Op, DAG);
1328 }
1329
1330 SDLoc DL(Op);
1331 SDValue Chain = StoreNode->getChain();
1332 SDValue Ptr = StoreNode->getBasePtr();
1333
1334 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001335 if (StoreNode->isTruncatingStore()) {
1336 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001337 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001338 EVT MemVT = StoreNode->getMemoryVT();
1339 SDValue MaskConstant;
1340 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001341 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001342 } else {
1343 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001344 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001345 }
1346 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001347 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001348 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001349 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001350 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1351 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001352 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001353 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1354 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1355 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1356 // vector instead.
1357 SDValue Src[4] = {
1358 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001359 DAG.getConstant(0, DL, MVT::i32),
1360 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001361 Mask
1362 };
Craig Topper48d114b2014-04-26 18:35:24 +00001363 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001364 SDValue Args[3] = { Chain, Input, DWordAddr };
1365 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001366 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001367 StoreNode->getMemOperand());
1368 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001369 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001370 // Convert pointer from byte address to dword address.
1371 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1372 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001373 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001374
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001375 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001376 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001377 } else {
1378 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1379 }
1380 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001381 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001382 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001383
Matt Arsenault95245662016-02-11 05:32:46 +00001384 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001385 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001386
Matt Arsenault95245662016-02-11 05:32:46 +00001387 EVT MemVT = StoreNode->getMemoryVT();
1388 if (MemVT.bitsLT(MVT::i32))
1389 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001390
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001391 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001392 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001393 const AMDGPUFrameLowering *TFL =
1394 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001395 unsigned StackWidth = TFL->getStackWidth(MF);
1396
1397 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1398
1399 if (ValueVT.isVector()) {
1400 unsigned NumElemVT = ValueVT.getVectorNumElements();
1401 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001402 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001403
1404 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1405 "vector width in load");
1406
1407 for (unsigned i = 0; i < NumElemVT; ++i) {
1408 unsigned Channel, PtrIncr;
1409 getStackAddress(StackWidth, i, Channel, PtrIncr);
1410 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001411 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001412 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001413 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001414
1415 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1416 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001417 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001418 }
Craig Topper48d114b2014-04-26 18:35:24 +00001419 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001420 } else {
1421 if (ValueVT == MVT::i8) {
1422 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1423 }
1424 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001425 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001426 }
1427
1428 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001429}
1430
Tom Stellard365366f2013-01-23 02:09:06 +00001431// return (512 + (kc_bank << 12)
1432static int
1433ConstantAddressBlock(unsigned AddressSpace) {
1434 switch (AddressSpace) {
1435 case AMDGPUAS::CONSTANT_BUFFER_0:
1436 return 512;
1437 case AMDGPUAS::CONSTANT_BUFFER_1:
1438 return 512 + 4096;
1439 case AMDGPUAS::CONSTANT_BUFFER_2:
1440 return 512 + 4096 * 2;
1441 case AMDGPUAS::CONSTANT_BUFFER_3:
1442 return 512 + 4096 * 3;
1443 case AMDGPUAS::CONSTANT_BUFFER_4:
1444 return 512 + 4096 * 4;
1445 case AMDGPUAS::CONSTANT_BUFFER_5:
1446 return 512 + 4096 * 5;
1447 case AMDGPUAS::CONSTANT_BUFFER_6:
1448 return 512 + 4096 * 6;
1449 case AMDGPUAS::CONSTANT_BUFFER_7:
1450 return 512 + 4096 * 7;
1451 case AMDGPUAS::CONSTANT_BUFFER_8:
1452 return 512 + 4096 * 8;
1453 case AMDGPUAS::CONSTANT_BUFFER_9:
1454 return 512 + 4096 * 9;
1455 case AMDGPUAS::CONSTANT_BUFFER_10:
1456 return 512 + 4096 * 10;
1457 case AMDGPUAS::CONSTANT_BUFFER_11:
1458 return 512 + 4096 * 11;
1459 case AMDGPUAS::CONSTANT_BUFFER_12:
1460 return 512 + 4096 * 12;
1461 case AMDGPUAS::CONSTANT_BUFFER_13:
1462 return 512 + 4096 * 13;
1463 case AMDGPUAS::CONSTANT_BUFFER_14:
1464 return 512 + 4096 * 14;
1465 case AMDGPUAS::CONSTANT_BUFFER_15:
1466 return 512 + 4096 * 15;
1467 default:
1468 return -1;
1469 }
1470}
1471
Matt Arsenault6dfda962016-02-10 18:21:39 +00001472SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1473 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001474 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001475 LoadSDNode *Load = cast<LoadSDNode>(Op);
1476 ISD::LoadExtType ExtType = Load->getExtensionType();
1477 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001478
Matt Arsenault6dfda962016-02-10 18:21:39 +00001479 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1480 // register (2-)byte extract.
1481
1482 // Get Register holding the target.
1483 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1484 DAG.getConstant(2, DL, MVT::i32));
1485 // Load the Register.
1486 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1487 Load->getChain(),
1488 Ptr,
1489 DAG.getTargetConstant(0, DL, MVT::i32),
1490 Op.getOperand(2));
1491
1492 // Get offset within the register.
1493 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1494 Load->getBasePtr(),
1495 DAG.getConstant(0x3, DL, MVT::i32));
1496
1497 // Bit offset of target byte (byteIdx * 8).
1498 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1499 DAG.getConstant(3, DL, MVT::i32));
1500
1501 // Shift to the right.
1502 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1503
1504 // Eliminate the upper bits by setting them to ...
1505 EVT MemEltVT = MemVT.getScalarType();
1506
1507 // ... ones.
1508 if (ExtType == ISD::SEXTLOAD) {
1509 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1510
1511 SDValue Ops[] = {
1512 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1513 Load->getChain()
1514 };
1515
1516 return DAG.getMergeValues(Ops, DL);
1517 }
1518
1519 // ... or zeros.
1520 SDValue Ops[] = {
1521 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1522 Load->getChain()
1523 };
1524
1525 return DAG.getMergeValues(Ops, DL);
1526}
1527
1528SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1529 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1530 unsigned AS = LoadNode->getAddressSpace();
1531 EVT MemVT = LoadNode->getMemoryVT();
1532 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1533
1534 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1535 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1536 return lowerPrivateExtLoad(Op, DAG);
1537 }
1538
1539 SDLoc DL(Op);
1540 EVT VT = Op.getValueType();
1541 SDValue Chain = LoadNode->getChain();
1542 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001543
Tom Stellard067c8152014-07-21 14:01:14 +00001544 // Lower loads constant address space global variable loads
1545 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001546 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001547 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001548
Mehdi Amini44ede332015-07-09 02:09:04 +00001549 SDValue Ptr = DAG.getZExtOrTrunc(
1550 LoadNode->getBasePtr(), DL,
1551 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001552 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001553 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001554 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1555 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001556 DAG.getTargetConstant(0, DL, MVT::i32),
1557 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001558 }
Tom Stellarde9373602014-01-22 19:24:14 +00001559
Tom Stellard35bb18c2013-08-26 15:06:04 +00001560 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1561 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001562 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001563 Chain
1564 };
Craig Topper64941d92014-04-27 19:20:57 +00001565 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001566 }
1567
Tom Stellard365366f2013-01-23 02:09:06 +00001568 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001569 if (ConstantBlock > -1 &&
1570 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1571 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001572 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001573 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1574 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001575 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001576 SDValue Slots[4];
1577 for (unsigned i = 0; i < 4; i++) {
1578 // We want Const position encoded with the following formula :
1579 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1580 // const_index is Ptr computed by llvm using an alignment of 16.
1581 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1582 // then div by 4 at the ISel step
1583 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001584 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001585 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1586 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001587 EVT NewVT = MVT::v4i32;
1588 unsigned NumElements = 4;
1589 if (VT.isVector()) {
1590 NewVT = VT;
1591 NumElements = VT.getVectorNumElements();
1592 }
Craig Topper48d114b2014-04-26 18:35:24 +00001593 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001594 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001595 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001596 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001597 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001598 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1599 DAG.getConstant(4, DL, MVT::i32)),
1600 DAG.getConstant(LoadNode->getAddressSpace() -
1601 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001602 );
1603 }
1604
1605 if (!VT.isVector()) {
1606 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001607 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001608 }
1609
1610 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001611 Result,
1612 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001613 };
Craig Topper64941d92014-04-27 19:20:57 +00001614 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001615 }
1616
Matt Arsenault6dfda962016-02-10 18:21:39 +00001617 SDValue LoweredLoad;
1618
Matt Arsenault909d0c02013-10-30 23:43:29 +00001619 // For most operations returning SDValue() will result in the node being
1620 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1621 // need to manually expand loads that may be legal in some address spaces and
1622 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1623 // compute shaders, since the data is sign extended when it is uploaded to the
1624 // buffer. However SEXT loads from other address spaces are not supported, so
1625 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001626 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1627 EVT MemVT = LoadNode->getMemoryVT();
1628 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001629 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1630 LoadNode->getPointerInfo(), MemVT,
1631 LoadNode->isVolatile(),
1632 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001633 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001634 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001635 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1636 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001637
Jan Veselyb670d372015-05-26 18:07:22 +00001638 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001639 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001640 }
1641
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001642 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1643 return SDValue();
1644 }
1645
1646 // Lowering for indirect addressing
1647 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001648 const AMDGPUFrameLowering *TFL =
1649 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001650 unsigned StackWidth = TFL->getStackWidth(MF);
1651
1652 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1653
1654 if (VT.isVector()) {
1655 unsigned NumElemVT = VT.getVectorNumElements();
1656 EVT ElemVT = VT.getVectorElementType();
1657 SDValue Loads[4];
1658
1659 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1660 "vector width in load");
1661
1662 for (unsigned i = 0; i < NumElemVT; ++i) {
1663 unsigned Channel, PtrIncr;
1664 getStackAddress(StackWidth, i, Channel, PtrIncr);
1665 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001666 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001667 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1668 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001669 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001670 Op.getOperand(2));
1671 }
1672 for (unsigned i = NumElemVT; i < 4; ++i) {
1673 Loads[i] = DAG.getUNDEF(ElemVT);
1674 }
1675 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001676 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001677 } else {
1678 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1679 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001680 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001681 Op.getOperand(2));
1682 }
1683
Matt Arsenault7939acd2014-04-07 16:44:24 +00001684 SDValue Ops[2] = {
1685 LoweredLoad,
1686 Chain
1687 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001688
Craig Topper64941d92014-04-27 19:20:57 +00001689 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001690}
Tom Stellard75aadc22012-12-11 21:25:42 +00001691
Matt Arsenault1d555c42014-06-23 18:00:55 +00001692SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1693 SDValue Chain = Op.getOperand(0);
1694 SDValue Cond = Op.getOperand(1);
1695 SDValue Jump = Op.getOperand(2);
1696
1697 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1698 Chain, Jump, Cond);
1699}
1700
Matt Arsenault81d06012016-03-07 21:10:13 +00001701SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1702 SelectionDAG &DAG) const {
1703 MachineFunction &MF = DAG.getMachineFunction();
1704 const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
1705
1706 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1707
1708 unsigned FrameIndex = FIN->getIndex();
1709 unsigned IgnoredFrameReg;
1710 unsigned Offset =
1711 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1712 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1713 Op.getValueType());
1714}
1715
Tom Stellard75aadc22012-12-11 21:25:42 +00001716/// XXX Only kernel functions are supported, so we can assume for now that
1717/// every function is a kernel function, but in the future we should use
1718/// separate calling conventions for kernel and non-kernel functions.
1719SDValue R600TargetLowering::LowerFormalArguments(
1720 SDValue Chain,
1721 CallingConv::ID CallConv,
1722 bool isVarArg,
1723 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001724 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001725 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001726 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001727 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1728 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001729 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001730 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001731
Tom Stellardaf775432013-10-23 00:44:32 +00001732 SmallVector<ISD::InputArg, 8> LocalIns;
1733
Matt Arsenault209a7b92014-04-18 07:40:20 +00001734 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001735
1736 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001737
Tom Stellard1e803092013-07-23 01:48:18 +00001738 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001739 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001740 const ISD::InputArg &In = Ins[i];
1741 EVT VT = In.VT;
1742 EVT MemVT = VA.getLocVT();
1743 if (!VT.isVector() && MemVT.isVector()) {
1744 // Get load source type if scalarized.
1745 MemVT = MemVT.getVectorElementType();
1746 }
Tom Stellard78e01292013-07-23 01:47:58 +00001747
Jan Veselye5121f32014-10-14 20:05:26 +00001748 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001749 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1750 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1751 InVals.push_back(Register);
1752 continue;
1753 }
1754
Tom Stellard75aadc22012-12-11 21:25:42 +00001755 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001756 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001757
Matt Arsenaultfae02982014-03-17 18:58:11 +00001758 // i64 isn't a legal type, so the register type used ends up as i32, which
1759 // isn't expected here. It attempts to create this sextload, but it ends up
1760 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1761 // for <1 x i64>.
1762
Tom Stellardacfeebf2013-07-23 01:48:05 +00001763 // The first 36 bytes of the input buffer contains information about
1764 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001765 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1766 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1767 // FIXME: This should really check the extload type, but the handling of
1768 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001769
Matt Arsenault74ef2772014-08-13 18:14:11 +00001770 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1771 Ext = ISD::SEXTLOAD;
1772 }
1773
1774 // Compute the offset from the value.
1775 // XXX - I think PartOffset should give you this, but it seems to give the
1776 // size of the register which isn't useful.
1777
Andrew Trick05938a52015-02-16 18:10:47 +00001778 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001779 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001780 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001781
1782 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1783 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001784 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001785 DAG.getUNDEF(MVT::i32),
1786 PtrInfo,
1787 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001788
1789 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001790 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001791 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001792 }
1793 return Chain;
1794}
1795
Mehdi Amini44ede332015-07-09 02:09:04 +00001796EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1797 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001798 if (!VT.isVector())
1799 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001800 return VT.changeVectorElementTypeToInteger();
1801}
1802
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001803bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1804 unsigned AddrSpace,
1805 unsigned Align,
1806 bool *IsFast) const {
1807 if (IsFast)
1808 *IsFast = false;
1809
1810 if (!VT.isSimple() || VT == MVT::Other)
1811 return false;
1812
1813 if (VT.bitsLT(MVT::i32))
1814 return false;
1815
1816 // TODO: This is a rough estimate.
1817 if (IsFast)
1818 *IsFast = true;
1819
1820 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1821}
1822
Matt Arsenault209a7b92014-04-18 07:40:20 +00001823static SDValue CompactSwizzlableVector(
1824 SelectionDAG &DAG, SDValue VectorEntry,
1825 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001826 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1827 assert(RemapSwizzle.empty());
1828 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001829 VectorEntry.getOperand(0),
1830 VectorEntry.getOperand(1),
1831 VectorEntry.getOperand(2),
1832 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001833 };
1834
1835 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001836 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1837 // We mask write here to teach later passes that the ith element of this
1838 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1839 // break false dependencies and additionnaly make assembly easier to read.
1840 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001841 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1842 if (C->isZero()) {
1843 RemapSwizzle[i] = 4; // SEL_0
1844 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1845 } else if (C->isExactlyValue(1.0)) {
1846 RemapSwizzle[i] = 5; // SEL_1
1847 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1848 }
1849 }
1850
1851 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1852 continue;
1853 for (unsigned j = 0; j < i; j++) {
1854 if (NewBldVec[i] == NewBldVec[j]) {
1855 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1856 RemapSwizzle[i] = j;
1857 break;
1858 }
1859 }
1860 }
1861
1862 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001863 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001864}
1865
Benjamin Kramer193960c2013-06-11 13:32:25 +00001866static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1867 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001868 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1869 assert(RemapSwizzle.empty());
1870 SDValue NewBldVec[4] = {
1871 VectorEntry.getOperand(0),
1872 VectorEntry.getOperand(1),
1873 VectorEntry.getOperand(2),
1874 VectorEntry.getOperand(3)
1875 };
1876 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001877 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001878 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001879 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1880 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1881 ->getZExtValue();
1882 if (i == Idx)
1883 isUnmovable[Idx] = true;
1884 }
1885 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001886
1887 for (unsigned i = 0; i < 4; i++) {
1888 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1889 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1890 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001891 if (isUnmovable[Idx])
1892 continue;
1893 // Swap i and Idx
1894 std::swap(NewBldVec[Idx], NewBldVec[i]);
1895 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1896 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001897 }
1898 }
1899
1900 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001901 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001902}
1903
1904
1905SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001906 SDValue Swz[4], SelectionDAG &DAG,
1907 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001908 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1909 // Old -> New swizzle values
1910 DenseMap<unsigned, unsigned> SwizzleRemap;
1911
1912 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1913 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001914 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001915 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001916 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001917 }
1918
1919 SwizzleRemap.clear();
1920 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1921 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001922 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001923 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001924 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001925 }
1926
1927 return BuildVector;
1928}
1929
1930
Tom Stellard75aadc22012-12-11 21:25:42 +00001931//===----------------------------------------------------------------------===//
1932// Custom DAG Optimizations
1933//===----------------------------------------------------------------------===//
1934
1935SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1936 DAGCombinerInfo &DCI) const {
1937 SelectionDAG &DAG = DCI.DAG;
1938
1939 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001940 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001941 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1942 case ISD::FP_ROUND: {
1943 SDValue Arg = N->getOperand(0);
1944 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001945 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001946 Arg.getOperand(0));
1947 }
1948 break;
1949 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001950
1951 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1952 // (i32 select_cc f32, f32, -1, 0 cc)
1953 //
1954 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1955 // this to one of the SET*_DX10 instructions.
1956 case ISD::FP_TO_SINT: {
1957 SDValue FNeg = N->getOperand(0);
1958 if (FNeg.getOpcode() != ISD::FNEG) {
1959 return SDValue();
1960 }
1961 SDValue SelectCC = FNeg.getOperand(0);
1962 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1963 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1964 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1965 !isHWTrueValue(SelectCC.getOperand(2)) ||
1966 !isHWFalseValue(SelectCC.getOperand(3))) {
1967 return SDValue();
1968 }
1969
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001970 SDLoc dl(N);
1971 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001972 SelectCC.getOperand(0), // LHS
1973 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001974 DAG.getConstant(-1, dl, MVT::i32), // True
1975 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001976 SelectCC.getOperand(4)); // CC
1977
1978 break;
1979 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001980
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001981 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1982 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001983 case ISD::INSERT_VECTOR_ELT: {
1984 SDValue InVec = N->getOperand(0);
1985 SDValue InVal = N->getOperand(1);
1986 SDValue EltNo = N->getOperand(2);
1987 SDLoc dl(N);
1988
1989 // If the inserted element is an UNDEF, just use the input vector.
1990 if (InVal.getOpcode() == ISD::UNDEF)
1991 return InVec;
1992
1993 EVT VT = InVec.getValueType();
1994
1995 // If we can't generate a legal BUILD_VECTOR, exit
1996 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1997 return SDValue();
1998
1999 // Check that we know which element is being inserted
2000 if (!isa<ConstantSDNode>(EltNo))
2001 return SDValue();
2002 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2003
2004 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2005 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2006 // vector elements.
2007 SmallVector<SDValue, 8> Ops;
2008 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2009 Ops.append(InVec.getNode()->op_begin(),
2010 InVec.getNode()->op_end());
2011 } else if (InVec.getOpcode() == ISD::UNDEF) {
2012 unsigned NElts = VT.getVectorNumElements();
2013 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2014 } else {
2015 return SDValue();
2016 }
2017
2018 // Insert the element
2019 if (Elt < Ops.size()) {
2020 // All the operands of BUILD_VECTOR must have the same type;
2021 // we enforce that here.
2022 EVT OpVT = Ops[0].getValueType();
2023 if (InVal.getValueType() != OpVT)
2024 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2025 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2026 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2027 Ops[Elt] = InVal;
2028 }
2029
2030 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00002031 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002032 }
2033
Tom Stellard365366f2013-01-23 02:09:06 +00002034 // Extract_vec (Build_vector) generated by custom lowering
2035 // also needs to be customly combined
2036 case ISD::EXTRACT_VECTOR_ELT: {
2037 SDValue Arg = N->getOperand(0);
2038 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2039 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2040 unsigned Element = Const->getZExtValue();
2041 return Arg->getOperand(Element);
2042 }
2043 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002044 if (Arg.getOpcode() == ISD::BITCAST &&
2045 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2046 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2047 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002048 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002049 Arg->getOperand(0).getOperand(Element));
2050 }
2051 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002052 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002053 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002054
2055 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002056 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002057 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002058 return Ret;
2059
Tom Stellarde06163a2013-02-07 14:02:35 +00002060 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2061 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002062 //
2063 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2064 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002065 SDValue LHS = N->getOperand(0);
2066 if (LHS.getOpcode() != ISD::SELECT_CC) {
2067 return SDValue();
2068 }
2069
2070 SDValue RHS = N->getOperand(1);
2071 SDValue True = N->getOperand(2);
2072 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002073 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002074
2075 if (LHS.getOperand(2).getNode() != True.getNode() ||
2076 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002077 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002078 return SDValue();
2079 }
2080
Tom Stellard5e524892013-03-08 15:37:11 +00002081 switch (NCC) {
2082 default: return SDValue();
2083 case ISD::SETNE: return LHS;
2084 case ISD::SETEQ: {
2085 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2086 LHSCC = ISD::getSetCCInverse(LHSCC,
2087 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002088 if (DCI.isBeforeLegalizeOps() ||
2089 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2090 return DAG.getSelectCC(SDLoc(N),
2091 LHS.getOperand(0),
2092 LHS.getOperand(1),
2093 LHS.getOperand(2),
2094 LHS.getOperand(3),
2095 LHSCC);
2096 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002097 }
Tom Stellard5e524892013-03-08 15:37:11 +00002098 }
Tom Stellardcd428182013-09-28 02:50:38 +00002099 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002100 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002101
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002102 case AMDGPUISD::EXPORT: {
2103 SDValue Arg = N->getOperand(1);
2104 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2105 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002106
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002107 SDValue NewArgs[8] = {
2108 N->getOperand(0), // Chain
2109 SDValue(),
2110 N->getOperand(2), // ArrayBase
2111 N->getOperand(3), // Type
2112 N->getOperand(4), // SWZ_X
2113 N->getOperand(5), // SWZ_Y
2114 N->getOperand(6), // SWZ_Z
2115 N->getOperand(7) // SWZ_W
2116 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002117 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002118 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002119 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002120 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002121 case AMDGPUISD::TEXTURE_FETCH: {
2122 SDValue Arg = N->getOperand(1);
2123 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2124 break;
2125
2126 SDValue NewArgs[19] = {
2127 N->getOperand(0),
2128 N->getOperand(1),
2129 N->getOperand(2),
2130 N->getOperand(3),
2131 N->getOperand(4),
2132 N->getOperand(5),
2133 N->getOperand(6),
2134 N->getOperand(7),
2135 N->getOperand(8),
2136 N->getOperand(9),
2137 N->getOperand(10),
2138 N->getOperand(11),
2139 N->getOperand(12),
2140 N->getOperand(13),
2141 N->getOperand(14),
2142 N->getOperand(15),
2143 N->getOperand(16),
2144 N->getOperand(17),
2145 N->getOperand(18),
2146 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002147 SDLoc DL(N);
2148 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2149 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002150 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002151 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002152
2153 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002154}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002155
2156static bool
2157FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002158 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002159 const R600InstrInfo *TII =
2160 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002161 if (!Src.isMachineOpcode())
2162 return false;
2163 switch (Src.getMachineOpcode()) {
2164 case AMDGPU::FNEG_R600:
2165 if (!Neg.getNode())
2166 return false;
2167 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002168 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002169 return true;
2170 case AMDGPU::FABS_R600:
2171 if (!Abs.getNode())
2172 return false;
2173 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002174 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002175 return true;
2176 case AMDGPU::CONST_COPY: {
2177 unsigned Opcode = ParentNode->getMachineOpcode();
2178 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2179
2180 if (!Sel.getNode())
2181 return false;
2182
2183 SDValue CstOffset = Src.getOperand(0);
2184 if (ParentNode->getValueType(0).isVector())
2185 return false;
2186
2187 // Gather constants values
2188 int SrcIndices[] = {
2189 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2190 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2191 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2192 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2193 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2194 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2195 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2196 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2197 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2198 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2199 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2200 };
2201 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002202 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002203 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2204 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2205 continue;
2206 if (HasDst) {
2207 OtherSrcIdx--;
2208 OtherSelIdx--;
2209 }
2210 if (RegisterSDNode *Reg =
2211 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2212 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002213 ConstantSDNode *Cst
2214 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002215 Consts.push_back(Cst->getZExtValue());
2216 }
2217 }
2218 }
2219
Matt Arsenault37c12d72014-05-12 20:42:57 +00002220 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002221 Consts.push_back(Cst->getZExtValue());
2222 if (!TII->fitsConstReadLimitations(Consts)) {
2223 return false;
2224 }
2225
2226 Sel = CstOffset;
2227 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2228 return true;
2229 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002230 case AMDGPU::MOV_IMM_I32:
2231 case AMDGPU::MOV_IMM_F32: {
2232 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2233 uint64_t ImmValue = 0;
2234
2235
2236 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2237 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2238 float FloatValue = FPC->getValueAPF().convertToFloat();
2239 if (FloatValue == 0.0) {
2240 ImmReg = AMDGPU::ZERO;
2241 } else if (FloatValue == 0.5) {
2242 ImmReg = AMDGPU::HALF;
2243 } else if (FloatValue == 1.0) {
2244 ImmReg = AMDGPU::ONE;
2245 } else {
2246 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2247 }
2248 } else {
2249 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2250 uint64_t Value = C->getZExtValue();
2251 if (Value == 0) {
2252 ImmReg = AMDGPU::ZERO;
2253 } else if (Value == 1) {
2254 ImmReg = AMDGPU::ONE_INT;
2255 } else {
2256 ImmValue = Value;
2257 }
2258 }
2259
2260 // Check that we aren't already using an immediate.
2261 // XXX: It's possible for an instruction to have more than one
2262 // immediate operand, but this is not supported yet.
2263 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2264 if (!Imm.getNode())
2265 return false;
2266 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2267 assert(C);
2268 if (C->getZExtValue())
2269 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002270 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002271 }
2272 Src = DAG.getRegister(ImmReg, MVT::i32);
2273 return true;
2274 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002275 default:
2276 return false;
2277 }
2278}
2279
2280
2281/// \brief Fold the instructions after selecting them
2282SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2283 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002284 const R600InstrInfo *TII =
2285 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002286 if (!Node->isMachineOpcode())
2287 return Node;
2288 unsigned Opcode = Node->getMachineOpcode();
2289 SDValue FakeOp;
2290
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002291 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002292
2293 if (Opcode == AMDGPU::DOT_4) {
2294 int OperandIdx[] = {
2295 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2296 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2297 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2298 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2299 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2300 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2301 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2302 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002303 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002304 int NegIdx[] = {
2305 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2306 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2307 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2308 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2309 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2310 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2311 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2312 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2313 };
2314 int AbsIdx[] = {
2315 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2316 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2317 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2318 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2319 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2320 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2321 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2322 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2323 };
2324 for (unsigned i = 0; i < 8; i++) {
2325 if (OperandIdx[i] < 0)
2326 return Node;
2327 SDValue &Src = Ops[OperandIdx[i] - 1];
2328 SDValue &Neg = Ops[NegIdx[i] - 1];
2329 SDValue &Abs = Ops[AbsIdx[i] - 1];
2330 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2331 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2332 if (HasDst)
2333 SelIdx--;
2334 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002335 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2336 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2337 }
2338 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2339 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2340 SDValue &Src = Ops[i];
2341 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002342 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2343 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002344 } else if (Opcode == AMDGPU::CLAMP_R600) {
2345 SDValue Src = Node->getOperand(0);
2346 if (!Src.isMachineOpcode() ||
2347 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2348 return Node;
2349 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2350 AMDGPU::OpName::clamp);
2351 if (ClampIdx < 0)
2352 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002353 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002354 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002355 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2356 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2357 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002358 } else {
2359 if (!TII->hasInstrModifiers(Opcode))
2360 return Node;
2361 int OperandIdx[] = {
2362 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2363 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2364 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2365 };
2366 int NegIdx[] = {
2367 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2368 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2369 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2370 };
2371 int AbsIdx[] = {
2372 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2373 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2374 -1
2375 };
2376 for (unsigned i = 0; i < 3; i++) {
2377 if (OperandIdx[i] < 0)
2378 return Node;
2379 SDValue &Src = Ops[OperandIdx[i] - 1];
2380 SDValue &Neg = Ops[NegIdx[i] - 1];
2381 SDValue FakeAbs;
2382 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2383 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2384 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002385 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2386 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002387 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002388 ImmIdx--;
2389 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002390 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002391 SDValue &Imm = Ops[ImmIdx];
2392 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002393 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2394 }
2395 }
2396
2397 return Node;
2398}