blob: f87ddeefe0ab913ae5f5c45152633739d058f588 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Jan Vesely25f36272014-06-18 12:27:13 +0000174 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175 // to be Legal/Custom in order to avoid library calls.
176 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000177 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000178 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000179
Michel Danzer49812b52013-07-10 16:37:07 +0000180 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
181
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000182 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183 for (MVT VT : ScalarIntVTs) {
184 setOperationAction(ISD::ADDC, VT, Expand);
185 setOperationAction(ISD::SUBC, VT, Expand);
186 setOperationAction(ISD::ADDE, VT, Expand);
187 setOperationAction(ISD::SUBE, VT, Expand);
188 }
189
Tom Stellardfc455472013-08-12 22:33:21 +0000190 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000191}
192
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000193static inline bool isEOP(MachineBasicBlock::iterator I) {
194 return std::next(I)->getOpcode() == AMDGPU::RETURN;
195}
196
Tom Stellard75aadc22012-12-11 21:25:42 +0000197MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
198 MachineInstr * MI, MachineBasicBlock * BB) const {
199 MachineFunction * MF = BB->getParent();
200 MachineRegisterInfo &MRI = MF->getRegInfo();
201 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000202 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000203 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000206 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000207 // Replace LDS_*_RET instruction that don't have any uses with the
208 // equivalent LDS_*_NORET instruction.
209 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000210 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
211 assert(DstIdx != -1);
212 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000213 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
214 // LDS_1A2D support and remove this special case.
215 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
216 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000217 return BB;
218
219 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
220 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000221 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
222 NewMI.addOperand(MI->getOperand(i));
223 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 } else {
225 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
226 }
227 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000228 case AMDGPU::CLAMP_R600: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
230 AMDGPU::MOV,
231 MI->getOperand(0).getReg(),
232 MI->getOperand(1).getReg());
233 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
234 break;
235 }
236
237 case AMDGPU::FABS_R600: {
238 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
239 AMDGPU::MOV,
240 MI->getOperand(0).getReg(),
241 MI->getOperand(1).getReg());
242 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
243 break;
244 }
245
246 case AMDGPU::FNEG_R600: {
247 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
248 AMDGPU::MOV,
249 MI->getOperand(0).getReg(),
250 MI->getOperand(1).getReg());
251 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
252 break;
253 }
254
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 case AMDGPU::MASK_WRITE: {
256 unsigned maskedRegister = MI->getOperand(0).getReg();
257 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
258 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
259 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
260 break;
261 }
262
263 case AMDGPU::MOV_IMM_F32:
264 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
265 MI->getOperand(1).getFPImm()->getValueAPF()
266 .bitcastToAPInt().getZExtValue());
267 break;
268 case AMDGPU::MOV_IMM_I32:
269 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
270 MI->getOperand(1).getImm());
271 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000272 case AMDGPU::CONST_COPY: {
273 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
274 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000275 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000276 MI->getOperand(1).getImm());
277 break;
278 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000279
280 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000281 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
284 .addOperand(MI->getOperand(0))
285 .addOperand(MI->getOperand(1))
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000286 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000287 break;
288 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000289 case AMDGPU::RAT_STORE_TYPED_eg: {
290 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
291 .addOperand(MI->getOperand(0))
292 .addOperand(MI->getOperand(1))
293 .addOperand(MI->getOperand(2))
294 .addImm(isEOP(I)); // Set End of program bit
295 break;
296 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000297
Tom Stellard75aadc22012-12-11 21:25:42 +0000298 case AMDGPU::TXD: {
299 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
300 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000301 MachineOperand &RID = MI->getOperand(4);
302 MachineOperand &SID = MI->getOperand(5);
303 unsigned TextureId = MI->getOperand(6).getImm();
304 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
305 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000306
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 switch (TextureId) {
308 case 5: // Rect
309 CTX = CTY = 0;
310 break;
311 case 6: // Shadow1D
312 SrcW = SrcZ;
313 break;
314 case 7: // Shadow2D
315 SrcW = SrcZ;
316 break;
317 case 8: // ShadowRect
318 CTX = CTY = 0;
319 SrcW = SrcZ;
320 break;
321 case 9: // 1DArray
322 SrcZ = SrcY;
323 CTZ = 0;
324 break;
325 case 10: // 2DArray
326 CTZ = 0;
327 break;
328 case 11: // Shadow1DArray
329 SrcZ = SrcY;
330 CTZ = 0;
331 break;
332 case 12: // Shadow2DArray
333 CTZ = 0;
334 break;
335 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
337 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000338 .addImm(SrcX)
339 .addImm(SrcY)
340 .addImm(SrcZ)
341 .addImm(SrcW)
342 .addImm(0)
343 .addImm(0)
344 .addImm(0)
345 .addImm(0)
346 .addImm(1)
347 .addImm(2)
348 .addImm(3)
349 .addOperand(RID)
350 .addOperand(SID)
351 .addImm(CTX)
352 .addImm(CTY)
353 .addImm(CTZ)
354 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000355 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
356 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000357 .addImm(SrcX)
358 .addImm(SrcY)
359 .addImm(SrcZ)
360 .addImm(SrcW)
361 .addImm(0)
362 .addImm(0)
363 .addImm(0)
364 .addImm(0)
365 .addImm(1)
366 .addImm(2)
367 .addImm(3)
368 .addOperand(RID)
369 .addOperand(SID)
370 .addImm(CTX)
371 .addImm(CTY)
372 .addImm(CTZ)
373 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000374 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
375 .addOperand(MI->getOperand(0))
376 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000377 .addImm(SrcX)
378 .addImm(SrcY)
379 .addImm(SrcZ)
380 .addImm(SrcW)
381 .addImm(0)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(1)
386 .addImm(2)
387 .addImm(3)
388 .addOperand(RID)
389 .addOperand(SID)
390 .addImm(CTX)
391 .addImm(CTY)
392 .addImm(CTZ)
393 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 .addReg(T0, RegState::Implicit)
395 .addReg(T1, RegState::Implicit);
396 break;
397 }
398
399 case AMDGPU::TXD_SHADOW: {
400 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
401 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000402 MachineOperand &RID = MI->getOperand(4);
403 MachineOperand &SID = MI->getOperand(5);
404 unsigned TextureId = MI->getOperand(6).getImm();
405 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
406 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
407
408 switch (TextureId) {
409 case 5: // Rect
410 CTX = CTY = 0;
411 break;
412 case 6: // Shadow1D
413 SrcW = SrcZ;
414 break;
415 case 7: // Shadow2D
416 SrcW = SrcZ;
417 break;
418 case 8: // ShadowRect
419 CTX = CTY = 0;
420 SrcW = SrcZ;
421 break;
422 case 9: // 1DArray
423 SrcZ = SrcY;
424 CTZ = 0;
425 break;
426 case 10: // 2DArray
427 CTZ = 0;
428 break;
429 case 11: // Shadow1DArray
430 SrcZ = SrcY;
431 CTZ = 0;
432 break;
433 case 12: // Shadow2DArray
434 CTZ = 0;
435 break;
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437
438 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
439 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000440 .addImm(SrcX)
441 .addImm(SrcY)
442 .addImm(SrcZ)
443 .addImm(SrcW)
444 .addImm(0)
445 .addImm(0)
446 .addImm(0)
447 .addImm(0)
448 .addImm(1)
449 .addImm(2)
450 .addImm(3)
451 .addOperand(RID)
452 .addOperand(SID)
453 .addImm(CTX)
454 .addImm(CTY)
455 .addImm(CTZ)
456 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000457 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
458 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000459 .addImm(SrcX)
460 .addImm(SrcY)
461 .addImm(SrcZ)
462 .addImm(SrcW)
463 .addImm(0)
464 .addImm(0)
465 .addImm(0)
466 .addImm(0)
467 .addImm(1)
468 .addImm(2)
469 .addImm(3)
470 .addOperand(RID)
471 .addOperand(SID)
472 .addImm(CTX)
473 .addImm(CTY)
474 .addImm(CTZ)
475 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
477 .addOperand(MI->getOperand(0))
478 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000479 .addImm(SrcX)
480 .addImm(SrcY)
481 .addImm(SrcZ)
482 .addImm(SrcW)
483 .addImm(0)
484 .addImm(0)
485 .addImm(0)
486 .addImm(0)
487 .addImm(1)
488 .addImm(2)
489 .addImm(3)
490 .addOperand(RID)
491 .addOperand(SID)
492 .addImm(CTX)
493 .addImm(CTY)
494 .addImm(CTZ)
495 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 .addReg(T0, RegState::Implicit)
497 .addReg(T1, RegState::Implicit);
498 break;
499 }
500
501 case AMDGPU::BRANCH:
502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000503 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 break;
505
506 case AMDGPU::BRANCH_COND_f32: {
507 MachineInstr *NewMI =
508 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
509 AMDGPU::PREDICATE_BIT)
510 .addOperand(MI->getOperand(1))
511 .addImm(OPCODE_IS_NOT_ZERO)
512 .addImm(0); // Flags
513 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000514 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 .addOperand(MI->getOperand(0))
516 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
517 break;
518 }
519
520 case AMDGPU::BRANCH_COND_i32: {
521 MachineInstr *NewMI =
522 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
523 AMDGPU::PREDICATE_BIT)
524 .addOperand(MI->getOperand(1))
525 .addImm(OPCODE_IS_NOT_ZERO_INT)
526 .addImm(0); // Flags
527 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 .addOperand(MI->getOperand(0))
530 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
531 break;
532 }
533
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 case AMDGPU::EG_ExportSwz:
535 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000536 // Instruction is left unmodified if its not the last one of its type
537 bool isLastInstructionOfItsType = true;
538 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000539 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000540 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000541 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000542 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
543 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
544 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
545 .getImm();
546 if (CurrentInstExportType == InstExportType) {
547 isLastInstructionOfItsType = false;
548 break;
549 }
550 }
551 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000552 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000553 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 return BB;
555 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
556 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
557 .addOperand(MI->getOperand(0))
558 .addOperand(MI->getOperand(1))
559 .addOperand(MI->getOperand(2))
560 .addOperand(MI->getOperand(3))
561 .addOperand(MI->getOperand(4))
562 .addOperand(MI->getOperand(5))
563 .addOperand(MI->getOperand(6))
564 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 break;
567 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000568 case AMDGPU::RETURN: {
569 // RETURN instructions must have the live-out registers as implicit uses,
570 // otherwise they appear dead.
571 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
572 MachineInstrBuilder MIB(*MF, MI);
573 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
574 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
575 return BB;
576 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000577 }
578
579 MI->eraseFromParent();
580 return BB;
581}
582
583//===----------------------------------------------------------------------===//
584// Custom DAG Lowering Operations
585//===----------------------------------------------------------------------===//
586
Tom Stellard75aadc22012-12-11 21:25:42 +0000587SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000588 MachineFunction &MF = DAG.getMachineFunction();
589 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000590 switch (Op.getOpcode()) {
591 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000592 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000594 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000595 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000596 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000597 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
598 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000599 case ISD::FCOS:
600 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000601 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000603 case ISD::LOAD: {
604 SDValue Result = LowerLOAD(Op, DAG);
605 assert((!Result.getNode() ||
606 Result.getNode()->getNumValues() == 2) &&
607 "Load should return a value and a chain");
608 return Result;
609 }
610
Matt Arsenault1d555c42014-06-23 18:00:55 +0000611 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000612 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000613 case ISD::INTRINSIC_VOID: {
614 SDValue Chain = Op.getOperand(0);
615 unsigned IntrinsicID =
616 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
617 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000618 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000619 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000620 const SDValue Args[8] = {
621 Chain,
622 Op.getOperand(2), // Export Value
623 Op.getOperand(3), // ArrayBase
624 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000625 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
626 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
627 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
628 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000629 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000630 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000631 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000632
Tom Stellard75aadc22012-12-11 21:25:42 +0000633 // default for switch(IntrinsicID)
634 default: break;
635 }
636 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
637 break;
638 }
639 case ISD::INTRINSIC_WO_CHAIN: {
640 unsigned IntrinsicID =
641 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
642 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000643 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000644 switch(IntrinsicID) {
645 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000646 case AMDGPUIntrinsic::R600_interp_xy:
647 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000648 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000649 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000650 SDValue RegisterINode = Op.getOperand(2);
651 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000652
Vincent Lejeunef143af32013-11-11 22:10:24 +0000653 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000654 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000655 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000656 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000657 else
658 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000659 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000660 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000661 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
662 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000663 }
Matt Arsenault59bd3012016-01-22 19:00:09 +0000664 case AMDGPUIntrinsic::r600_tex:
665 case AMDGPUIntrinsic::r600_texc:
666 case AMDGPUIntrinsic::r600_txl:
667 case AMDGPUIntrinsic::r600_txlc:
668 case AMDGPUIntrinsic::r600_txb:
669 case AMDGPUIntrinsic::r600_txbc:
670 case AMDGPUIntrinsic::r600_txf:
671 case AMDGPUIntrinsic::r600_txq:
672 case AMDGPUIntrinsic::r600_ddx:
673 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000674 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000675 unsigned TextureOp;
676 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000677 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000678 TextureOp = 0;
679 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000680 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000681 TextureOp = 1;
682 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000683 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000684 TextureOp = 2;
685 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000686 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000687 TextureOp = 3;
688 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000689 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000690 TextureOp = 4;
691 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000692 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 TextureOp = 5;
694 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000695 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000696 TextureOp = 6;
697 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000698 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000699 TextureOp = 7;
700 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000701 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000702 TextureOp = 8;
703 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000704 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000705 TextureOp = 9;
706 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000707 case AMDGPUIntrinsic::R600_ldptr:
708 TextureOp = 10;
709 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000710 default:
711 llvm_unreachable("Unknow Texture Operation");
712 }
713
714 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000715 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000717 DAG.getConstant(0, DL, MVT::i32),
718 DAG.getConstant(1, DL, MVT::i32),
719 DAG.getConstant(2, DL, MVT::i32),
720 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 Op.getOperand(2),
722 Op.getOperand(3),
723 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000724 DAG.getConstant(0, DL, MVT::i32),
725 DAG.getConstant(1, DL, MVT::i32),
726 DAG.getConstant(2, DL, MVT::i32),
727 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000728 Op.getOperand(5),
729 Op.getOperand(6),
730 Op.getOperand(7),
731 Op.getOperand(8),
732 Op.getOperand(9),
733 Op.getOperand(10)
734 };
Craig Topper48d114b2014-04-26 18:35:24 +0000735 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000736 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000737 case AMDGPUIntrinsic::AMDGPU_dp4: {
738 SDValue Args[8] = {
739 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000740 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000741 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000742 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000744 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 };
Craig Topper48d114b2014-04-26 18:35:24 +0000756 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000758
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000759 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000760 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000763 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000764 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return LowerImplicitParameter(DAG, VT, DL, 8);
777
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000778 case Intrinsic::r600_read_workdim:
779 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000780 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
781 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
782 }
Jan Veselye5121f32014-10-14 20:05:26 +0000783
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
786 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000787 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000788 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
789 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000790 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
792 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000802
803 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000804 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenault79963e82016-02-13 01:03:00 +0000805 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000806
807 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000808 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000809 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
810 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 }
812 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
813 break;
814 }
815 } // end switch(Op.getOpcode())
816 return SDValue();
817}
818
819void R600TargetLowering::ReplaceNodeResults(SDNode *N,
820 SmallVectorImpl<SDValue> &Results,
821 SelectionDAG &DAG) const {
822 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000823 default:
824 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
825 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000826 case ISD::FP_TO_UINT:
827 if (N->getValueType(0) == MVT::i1) {
828 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
829 return;
830 }
831 // Fall-through. Since we don't care about out of bounds values
832 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
833 // considers some extra cases which are not necessary here.
834 case ISD::FP_TO_SINT: {
835 SDValue Result;
836 if (expandFP_TO_SINT(N, Result, DAG))
837 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000838 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000839 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000840 case ISD::SDIVREM: {
841 SDValue Op = SDValue(N, 1);
842 SDValue RES = LowerSDIVREM(Op, DAG);
843 Results.push_back(RES);
844 Results.push_back(RES.getValue(1));
845 break;
846 }
847 case ISD::UDIVREM: {
848 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000849 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000850 break;
851 }
852 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000853}
854
Tom Stellard880a80a2014-06-17 16:53:14 +0000855SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
856 SDValue Vector) const {
857
858 SDLoc DL(Vector);
859 EVT VecVT = Vector.getValueType();
860 EVT EltVT = VecVT.getVectorElementType();
861 SmallVector<SDValue, 8> Args;
862
863 for (unsigned i = 0, e = VecVT.getVectorNumElements();
864 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000865 Args.push_back(DAG.getNode(
866 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
867 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000868 }
869
870 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
871}
872
873SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
874 SelectionDAG &DAG) const {
875
876 SDLoc DL(Op);
877 SDValue Vector = Op.getOperand(0);
878 SDValue Index = Op.getOperand(1);
879
880 if (isa<ConstantSDNode>(Index) ||
881 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
882 return Op;
883
884 Vector = vectorToVerticalVector(DAG, Vector);
885 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
886 Vector, Index);
887}
888
889SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
890 SelectionDAG &DAG) const {
891 SDLoc DL(Op);
892 SDValue Vector = Op.getOperand(0);
893 SDValue Value = Op.getOperand(1);
894 SDValue Index = Op.getOperand(2);
895
896 if (isa<ConstantSDNode>(Index) ||
897 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
898 return Op;
899
900 Vector = vectorToVerticalVector(DAG, Vector);
901 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
902 Vector, Value, Index);
903 return vectorToVerticalVector(DAG, Insert);
904}
905
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000906SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
907 // On hw >= R700, COS/SIN input must be between -1. and 1.
908 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
909 EVT VT = Op.getValueType();
910 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000911 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000912
913 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000914 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
915 DAG.getNode(ISD::FADD, DL, VT,
916 DAG.getNode(ISD::FMUL, DL, VT, Arg,
917 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
918 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000919 unsigned TrigNode;
920 switch (Op.getOpcode()) {
921 case ISD::FCOS:
922 TrigNode = AMDGPUISD::COS_HW;
923 break;
924 case ISD::FSIN:
925 TrigNode = AMDGPUISD::SIN_HW;
926 break;
927 default:
928 llvm_unreachable("Wrong trig opcode");
929 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000930 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
931 DAG.getNode(ISD::FADD, DL, VT, FractPart,
932 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000933 if (Gen >= AMDGPUSubtarget::R700)
934 return TrigVal;
935 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000936 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
937 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000938}
939
Jan Vesely25f36272014-06-18 12:27:13 +0000940SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
941 SDLoc DL(Op);
942 EVT VT = Op.getValueType();
943
944 SDValue Lo = Op.getOperand(0);
945 SDValue Hi = Op.getOperand(1);
946 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000947 SDValue Zero = DAG.getConstant(0, DL, VT);
948 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000949
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000950 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
951 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000952 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
953 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
954
955 // The dance around Width1 is necessary for 0 special case.
956 // Without it the CompShift might be 32, producing incorrect results in
957 // Overflow. So we do the shift in two steps, the alternative is to
958 // add a conditional to filter the special case.
959
960 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
961 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
962
963 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
964 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
965 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
966
967 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
968 SDValue LoBig = Zero;
969
970 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
971 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
972
973 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
974}
975
Jan Vesely900ff2e2014-06-18 12:27:15 +0000976SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
977 SDLoc DL(Op);
978 EVT VT = Op.getValueType();
979
980 SDValue Lo = Op.getOperand(0);
981 SDValue Hi = Op.getOperand(1);
982 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000983 SDValue Zero = DAG.getConstant(0, DL, VT);
984 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000985
Jan Veselyecf51332014-06-18 12:27:17 +0000986 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
987
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000988 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
989 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000990 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
991 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
992
993 // The dance around Width1 is necessary for 0 special case.
994 // Without it the CompShift might be 32, producing incorrect results in
995 // Overflow. So we do the shift in two steps, the alternative is to
996 // add a conditional to filter the special case.
997
998 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
999 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1000
Jan Veselyecf51332014-06-18 12:27:17 +00001001 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001002 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1003 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1004
Jan Veselyecf51332014-06-18 12:27:17 +00001005 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1006 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001007
1008 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1009 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1010
1011 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1012}
1013
Jan Vesely808fff52015-04-30 17:15:56 +00001014SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1015 unsigned mainop, unsigned ovf) const {
1016 SDLoc DL(Op);
1017 EVT VT = Op.getValueType();
1018
1019 SDValue Lo = Op.getOperand(0);
1020 SDValue Hi = Op.getOperand(1);
1021
1022 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1023 // Extend sign.
1024 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1025 DAG.getValueType(MVT::i1));
1026
1027 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1028
1029 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1030}
1031
Tom Stellard75aadc22012-12-11 21:25:42 +00001032SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001033 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001034 return DAG.getNode(
1035 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001036 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001037 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001038 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001039 DAG.getCondCode(ISD::SETNE)
1040 );
1041}
1042
Tom Stellard75aadc22012-12-11 21:25:42 +00001043SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001044 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001045 unsigned DwordOffset) const {
1046 unsigned ByteOffset = DwordOffset * 4;
1047 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001048 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001049
1050 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1051 assert(isInt<16>(ByteOffset));
1052
1053 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001054 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001055 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1056 false, false, false, 0);
1057}
1058
Tom Stellard75aadc22012-12-11 21:25:42 +00001059bool R600TargetLowering::isZero(SDValue Op) const {
1060 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1061 return Cst->isNullValue();
1062 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1063 return CstFP->isZero();
1064 } else {
1065 return false;
1066 }
1067}
1068
1069SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001070 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001071 EVT VT = Op.getValueType();
1072
1073 SDValue LHS = Op.getOperand(0);
1074 SDValue RHS = Op.getOperand(1);
1075 SDValue True = Op.getOperand(2);
1076 SDValue False = Op.getOperand(3);
1077 SDValue CC = Op.getOperand(4);
1078 SDValue Temp;
1079
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001080 if (VT == MVT::f32) {
1081 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1082 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1083 if (MinMax)
1084 return MinMax;
1085 }
1086
Tom Stellard75aadc22012-12-11 21:25:42 +00001087 // LHS and RHS are guaranteed to be the same value type
1088 EVT CompareVT = LHS.getValueType();
1089
1090 // Check if we can lower this to a native operation.
1091
Tom Stellard2add82d2013-03-08 15:37:09 +00001092 // Try to lower to a SET* instruction:
1093 //
1094 // SET* can match the following patterns:
1095 //
Tom Stellardcd428182013-09-28 02:50:38 +00001096 // select_cc f32, f32, -1, 0, cc_supported
1097 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1098 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001099 //
1100
1101 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001102 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1103 ISD::CondCode InverseCC =
1104 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001105 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1106 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1107 std::swap(False, True);
1108 CC = DAG.getCondCode(InverseCC);
1109 } else {
1110 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1111 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1112 std::swap(False, True);
1113 std::swap(LHS, RHS);
1114 CC = DAG.getCondCode(SwapInvCC);
1115 }
1116 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001117 }
1118
1119 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1120 (CompareVT == VT || VT == MVT::i32)) {
1121 // This can be matched by a SET* instruction.
1122 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1123 }
1124
Tom Stellard75aadc22012-12-11 21:25:42 +00001125 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001126 //
1127 // CND* can match the following patterns:
1128 //
Tom Stellardcd428182013-09-28 02:50:38 +00001129 // select_cc f32, 0.0, f32, f32, cc_supported
1130 // select_cc f32, 0.0, i32, i32, cc_supported
1131 // select_cc i32, 0, f32, f32, cc_supported
1132 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001133 //
Tom Stellardcd428182013-09-28 02:50:38 +00001134
1135 // Try to move the zero value to the RHS
1136 if (isZero(LHS)) {
1137 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1138 // Try swapping the operands
1139 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1140 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1141 std::swap(LHS, RHS);
1142 CC = DAG.getCondCode(CCSwapped);
1143 } else {
1144 // Try inverting the conditon and then swapping the operands
1145 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1146 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1147 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1148 std::swap(True, False);
1149 std::swap(LHS, RHS);
1150 CC = DAG.getCondCode(CCSwapped);
1151 }
1152 }
1153 }
1154 if (isZero(RHS)) {
1155 SDValue Cond = LHS;
1156 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001157 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1158 if (CompareVT != VT) {
1159 // Bitcast True / False to the correct types. This will end up being
1160 // a nop, but it allows us to define only a single pattern in the
1161 // .TD files for each CND* instruction rather than having to have
1162 // one pattern for integer True/False and one for fp True/False
1163 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1164 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1165 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001166
1167 switch (CCOpcode) {
1168 case ISD::SETONE:
1169 case ISD::SETUNE:
1170 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001171 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1172 Temp = True;
1173 True = False;
1174 False = Temp;
1175 break;
1176 default:
1177 break;
1178 }
1179 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1180 Cond, Zero,
1181 True, False,
1182 DAG.getCondCode(CCOpcode));
1183 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1184 }
1185
Tom Stellard75aadc22012-12-11 21:25:42 +00001186 // If we make it this for it means we have no native instructions to handle
1187 // this SELECT_CC, so we must lower it.
1188 SDValue HWTrue, HWFalse;
1189
1190 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001191 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1192 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001193 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001194 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1195 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001196 }
1197 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001198 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001199 }
1200
1201 // Lower this unsupported SELECT_CC into a combination of two supported
1202 // SELECT_CC operations.
1203 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1204
1205 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1206 Cond, HWFalse,
1207 True, False,
1208 DAG.getCondCode(ISD::SETNE));
1209}
1210
Alp Tokercb402912014-01-24 17:20:08 +00001211/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001212/// convert these pointers to a register index. Each register holds
1213/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1214/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1215/// for indirect addressing.
1216SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1217 unsigned StackWidth,
1218 SelectionDAG &DAG) const {
1219 unsigned SRLPad;
1220 switch(StackWidth) {
1221 case 1:
1222 SRLPad = 2;
1223 break;
1224 case 2:
1225 SRLPad = 3;
1226 break;
1227 case 4:
1228 SRLPad = 4;
1229 break;
1230 default: llvm_unreachable("Invalid stack width");
1231 }
1232
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001233 SDLoc DL(Ptr);
1234 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1235 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001236}
1237
1238void R600TargetLowering::getStackAddress(unsigned StackWidth,
1239 unsigned ElemIdx,
1240 unsigned &Channel,
1241 unsigned &PtrIncr) const {
1242 switch (StackWidth) {
1243 default:
1244 case 1:
1245 Channel = 0;
1246 if (ElemIdx > 0) {
1247 PtrIncr = 1;
1248 } else {
1249 PtrIncr = 0;
1250 }
1251 break;
1252 case 2:
1253 Channel = ElemIdx % 2;
1254 if (ElemIdx == 2) {
1255 PtrIncr = 1;
1256 } else {
1257 PtrIncr = 0;
1258 }
1259 break;
1260 case 4:
1261 Channel = ElemIdx;
1262 PtrIncr = 0;
1263 break;
1264 }
1265}
1266
Matt Arsenault95245662016-02-11 05:32:46 +00001267SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1268 SelectionDAG &DAG) const {
1269 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001270
Matt Arsenault95245662016-02-11 05:32:46 +00001271 unsigned Mask = 0;
1272 if (Store->getMemoryVT() == MVT::i8) {
1273 Mask = 0xff;
1274 } else if (Store->getMemoryVT() == MVT::i16) {
1275 Mask = 0xffff;
1276 }
1277
1278 SDValue Chain = Store->getChain();
1279 SDValue BasePtr = Store->getBasePtr();
1280 EVT MemVT = Store->getMemoryVT();
1281
1282 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1283 DAG.getConstant(2, DL, MVT::i32));
1284 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1285 Chain, Ptr,
1286 DAG.getTargetConstant(0, DL, MVT::i32));
1287
1288 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1289 DAG.getConstant(0x3, DL, MVT::i32));
1290
1291 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1292 DAG.getConstant(3, DL, MVT::i32));
1293
1294 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1295 Store->getValue());
1296
1297 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1298
1299 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1300 MaskedValue, ShiftAmt);
1301
1302 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1303 DAG.getConstant(Mask, DL, MVT::i32),
1304 ShiftAmt);
1305 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1306 DAG.getConstant(0xffffffff, DL, MVT::i32));
1307 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1308
1309 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1310 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1311 Chain, Value, Ptr,
1312 DAG.getTargetConstant(0, DL, MVT::i32));
1313}
1314
1315SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1316 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001317 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001318
Matt Arsenault95245662016-02-11 05:32:46 +00001319 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1320 unsigned AS = StoreNode->getAddressSpace();
1321 SDValue Value = StoreNode->getValue();
1322 EVT ValueVT = Value.getValueType();
1323
1324 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1325 ValueVT.isVector()) {
1326 return SplitVectorStore(Op, DAG);
1327 }
1328
1329 SDLoc DL(Op);
1330 SDValue Chain = StoreNode->getChain();
1331 SDValue Ptr = StoreNode->getBasePtr();
1332
1333 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001334 if (StoreNode->isTruncatingStore()) {
1335 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001336 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001337 EVT MemVT = StoreNode->getMemoryVT();
1338 SDValue MaskConstant;
1339 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001340 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001341 } else {
1342 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001343 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001344 }
1345 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001346 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001347 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001348 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001349 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1350 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001351 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001352 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1353 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1354 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1355 // vector instead.
1356 SDValue Src[4] = {
1357 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001358 DAG.getConstant(0, DL, MVT::i32),
1359 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001360 Mask
1361 };
Craig Topper48d114b2014-04-26 18:35:24 +00001362 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001363 SDValue Args[3] = { Chain, Input, DWordAddr };
1364 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001365 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001366 StoreNode->getMemOperand());
1367 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001368 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001369 // Convert pointer from byte address to dword address.
1370 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1371 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001372 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001373
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001374 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001375 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001376 } else {
1377 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1378 }
1379 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001380 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001381 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001382
Matt Arsenault95245662016-02-11 05:32:46 +00001383 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001384 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001385
Matt Arsenault95245662016-02-11 05:32:46 +00001386 EVT MemVT = StoreNode->getMemoryVT();
1387 if (MemVT.bitsLT(MVT::i32))
1388 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001389
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001390 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001391 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001392 const AMDGPUFrameLowering *TFL =
1393 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001394 unsigned StackWidth = TFL->getStackWidth(MF);
1395
1396 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1397
1398 if (ValueVT.isVector()) {
1399 unsigned NumElemVT = ValueVT.getVectorNumElements();
1400 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001401 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001402
1403 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1404 "vector width in load");
1405
1406 for (unsigned i = 0; i < NumElemVT; ++i) {
1407 unsigned Channel, PtrIncr;
1408 getStackAddress(StackWidth, i, Channel, PtrIncr);
1409 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001410 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001411 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001412 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001413
1414 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1415 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001416 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001417 }
Craig Topper48d114b2014-04-26 18:35:24 +00001418 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001419 } else {
1420 if (ValueVT == MVT::i8) {
1421 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1422 }
1423 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001424 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001425 }
1426
1427 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001428}
1429
Tom Stellard365366f2013-01-23 02:09:06 +00001430// return (512 + (kc_bank << 12)
1431static int
1432ConstantAddressBlock(unsigned AddressSpace) {
1433 switch (AddressSpace) {
1434 case AMDGPUAS::CONSTANT_BUFFER_0:
1435 return 512;
1436 case AMDGPUAS::CONSTANT_BUFFER_1:
1437 return 512 + 4096;
1438 case AMDGPUAS::CONSTANT_BUFFER_2:
1439 return 512 + 4096 * 2;
1440 case AMDGPUAS::CONSTANT_BUFFER_3:
1441 return 512 + 4096 * 3;
1442 case AMDGPUAS::CONSTANT_BUFFER_4:
1443 return 512 + 4096 * 4;
1444 case AMDGPUAS::CONSTANT_BUFFER_5:
1445 return 512 + 4096 * 5;
1446 case AMDGPUAS::CONSTANT_BUFFER_6:
1447 return 512 + 4096 * 6;
1448 case AMDGPUAS::CONSTANT_BUFFER_7:
1449 return 512 + 4096 * 7;
1450 case AMDGPUAS::CONSTANT_BUFFER_8:
1451 return 512 + 4096 * 8;
1452 case AMDGPUAS::CONSTANT_BUFFER_9:
1453 return 512 + 4096 * 9;
1454 case AMDGPUAS::CONSTANT_BUFFER_10:
1455 return 512 + 4096 * 10;
1456 case AMDGPUAS::CONSTANT_BUFFER_11:
1457 return 512 + 4096 * 11;
1458 case AMDGPUAS::CONSTANT_BUFFER_12:
1459 return 512 + 4096 * 12;
1460 case AMDGPUAS::CONSTANT_BUFFER_13:
1461 return 512 + 4096 * 13;
1462 case AMDGPUAS::CONSTANT_BUFFER_14:
1463 return 512 + 4096 * 14;
1464 case AMDGPUAS::CONSTANT_BUFFER_15:
1465 return 512 + 4096 * 15;
1466 default:
1467 return -1;
1468 }
1469}
1470
Matt Arsenault6dfda962016-02-10 18:21:39 +00001471SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1472 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001473 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001474 LoadSDNode *Load = cast<LoadSDNode>(Op);
1475 ISD::LoadExtType ExtType = Load->getExtensionType();
1476 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001477
Matt Arsenault6dfda962016-02-10 18:21:39 +00001478 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1479 // register (2-)byte extract.
1480
1481 // Get Register holding the target.
1482 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1483 DAG.getConstant(2, DL, MVT::i32));
1484 // Load the Register.
1485 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1486 Load->getChain(),
1487 Ptr,
1488 DAG.getTargetConstant(0, DL, MVT::i32),
1489 Op.getOperand(2));
1490
1491 // Get offset within the register.
1492 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1493 Load->getBasePtr(),
1494 DAG.getConstant(0x3, DL, MVT::i32));
1495
1496 // Bit offset of target byte (byteIdx * 8).
1497 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1498 DAG.getConstant(3, DL, MVT::i32));
1499
1500 // Shift to the right.
1501 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1502
1503 // Eliminate the upper bits by setting them to ...
1504 EVT MemEltVT = MemVT.getScalarType();
1505
1506 // ... ones.
1507 if (ExtType == ISD::SEXTLOAD) {
1508 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1509
1510 SDValue Ops[] = {
1511 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1512 Load->getChain()
1513 };
1514
1515 return DAG.getMergeValues(Ops, DL);
1516 }
1517
1518 // ... or zeros.
1519 SDValue Ops[] = {
1520 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1521 Load->getChain()
1522 };
1523
1524 return DAG.getMergeValues(Ops, DL);
1525}
1526
1527SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1528 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1529 unsigned AS = LoadNode->getAddressSpace();
1530 EVT MemVT = LoadNode->getMemoryVT();
1531 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1532
1533 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1534 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1535 return lowerPrivateExtLoad(Op, DAG);
1536 }
1537
1538 SDLoc DL(Op);
1539 EVT VT = Op.getValueType();
1540 SDValue Chain = LoadNode->getChain();
1541 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001542
Tom Stellard067c8152014-07-21 14:01:14 +00001543 // Lower loads constant address space global variable loads
1544 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001545 isa<GlobalVariable>(GetUnderlyingObject(
Mehdi Amini44ede332015-07-09 02:09:04 +00001546 LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001547
Mehdi Amini44ede332015-07-09 02:09:04 +00001548 SDValue Ptr = DAG.getZExtOrTrunc(
1549 LoadNode->getBasePtr(), DL,
1550 getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Tom Stellard067c8152014-07-21 14:01:14 +00001551 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001552 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001553 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1554 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001555 DAG.getTargetConstant(0, DL, MVT::i32),
1556 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001557 }
Tom Stellarde9373602014-01-22 19:24:14 +00001558
Tom Stellard35bb18c2013-08-26 15:06:04 +00001559 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1560 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001561 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001562 Chain
1563 };
Craig Topper64941d92014-04-27 19:20:57 +00001564 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001565 }
1566
Tom Stellard365366f2013-01-23 02:09:06 +00001567 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001568 if (ConstantBlock > -1 &&
1569 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1570 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001571 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001572 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1573 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001574 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001575 SDValue Slots[4];
1576 for (unsigned i = 0; i < 4; i++) {
1577 // We want Const position encoded with the following formula :
1578 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1579 // const_index is Ptr computed by llvm using an alignment of 16.
1580 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1581 // then div by 4 at the ISel step
1582 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001583 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001584 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1585 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001586 EVT NewVT = MVT::v4i32;
1587 unsigned NumElements = 4;
1588 if (VT.isVector()) {
1589 NewVT = VT;
1590 NumElements = VT.getVectorNumElements();
1591 }
Craig Topper48d114b2014-04-26 18:35:24 +00001592 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001593 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001594 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001595 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001596 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001597 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1598 DAG.getConstant(4, DL, MVT::i32)),
1599 DAG.getConstant(LoadNode->getAddressSpace() -
1600 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001601 );
1602 }
1603
1604 if (!VT.isVector()) {
1605 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001606 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001607 }
1608
1609 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001610 Result,
1611 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001612 };
Craig Topper64941d92014-04-27 19:20:57 +00001613 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001614 }
1615
Matt Arsenault6dfda962016-02-10 18:21:39 +00001616 SDValue LoweredLoad;
1617
Matt Arsenault909d0c02013-10-30 23:43:29 +00001618 // For most operations returning SDValue() will result in the node being
1619 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1620 // need to manually expand loads that may be legal in some address spaces and
1621 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1622 // compute shaders, since the data is sign extended when it is uploaded to the
1623 // buffer. However SEXT loads from other address spaces are not supported, so
1624 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001625 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1626 EVT MemVT = LoadNode->getMemoryVT();
1627 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001628 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1629 LoadNode->getPointerInfo(), MemVT,
1630 LoadNode->isVolatile(),
1631 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001632 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001633 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001634 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1635 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001636
Jan Veselyb670d372015-05-26 18:07:22 +00001637 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001638 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001639 }
1640
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001641 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1642 return SDValue();
1643 }
1644
1645 // Lowering for indirect addressing
1646 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001647 const AMDGPUFrameLowering *TFL =
1648 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001649 unsigned StackWidth = TFL->getStackWidth(MF);
1650
1651 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1652
1653 if (VT.isVector()) {
1654 unsigned NumElemVT = VT.getVectorNumElements();
1655 EVT ElemVT = VT.getVectorElementType();
1656 SDValue Loads[4];
1657
1658 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1659 "vector width in load");
1660
1661 for (unsigned i = 0; i < NumElemVT; ++i) {
1662 unsigned Channel, PtrIncr;
1663 getStackAddress(StackWidth, i, Channel, PtrIncr);
1664 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001665 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001666 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1667 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001668 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001669 Op.getOperand(2));
1670 }
1671 for (unsigned i = NumElemVT; i < 4; ++i) {
1672 Loads[i] = DAG.getUNDEF(ElemVT);
1673 }
1674 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001675 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001676 } else {
1677 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1678 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001679 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001680 Op.getOperand(2));
1681 }
1682
Matt Arsenault7939acd2014-04-07 16:44:24 +00001683 SDValue Ops[2] = {
1684 LoweredLoad,
1685 Chain
1686 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001687
Craig Topper64941d92014-04-27 19:20:57 +00001688 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001689}
Tom Stellard75aadc22012-12-11 21:25:42 +00001690
Matt Arsenault1d555c42014-06-23 18:00:55 +00001691SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1692 SDValue Chain = Op.getOperand(0);
1693 SDValue Cond = Op.getOperand(1);
1694 SDValue Jump = Op.getOperand(2);
1695
1696 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1697 Chain, Jump, Cond);
1698}
1699
Tom Stellard75aadc22012-12-11 21:25:42 +00001700/// XXX Only kernel functions are supported, so we can assume for now that
1701/// every function is a kernel function, but in the future we should use
1702/// separate calling conventions for kernel and non-kernel functions.
1703SDValue R600TargetLowering::LowerFormalArguments(
1704 SDValue Chain,
1705 CallingConv::ID CallConv,
1706 bool isVarArg,
1707 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001708 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001709 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001710 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001711 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1712 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001713 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001714 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001715
Tom Stellardaf775432013-10-23 00:44:32 +00001716 SmallVector<ISD::InputArg, 8> LocalIns;
1717
Matt Arsenault209a7b92014-04-18 07:40:20 +00001718 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001719
1720 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001721
Tom Stellard1e803092013-07-23 01:48:18 +00001722 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001723 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001724 const ISD::InputArg &In = Ins[i];
1725 EVT VT = In.VT;
1726 EVT MemVT = VA.getLocVT();
1727 if (!VT.isVector() && MemVT.isVector()) {
1728 // Get load source type if scalarized.
1729 MemVT = MemVT.getVectorElementType();
1730 }
Tom Stellard78e01292013-07-23 01:47:58 +00001731
Jan Veselye5121f32014-10-14 20:05:26 +00001732 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001733 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1734 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1735 InVals.push_back(Register);
1736 continue;
1737 }
1738
Tom Stellard75aadc22012-12-11 21:25:42 +00001739 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001740 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001741
Matt Arsenaultfae02982014-03-17 18:58:11 +00001742 // i64 isn't a legal type, so the register type used ends up as i32, which
1743 // isn't expected here. It attempts to create this sextload, but it ends up
1744 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1745 // for <1 x i64>.
1746
Tom Stellardacfeebf2013-07-23 01:48:05 +00001747 // The first 36 bytes of the input buffer contains information about
1748 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001749 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1750 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1751 // FIXME: This should really check the extload type, but the handling of
1752 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001753
Matt Arsenault74ef2772014-08-13 18:14:11 +00001754 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1755 Ext = ISD::SEXTLOAD;
1756 }
1757
1758 // Compute the offset from the value.
1759 // XXX - I think PartOffset should give you this, but it seems to give the
1760 // size of the register which isn't useful.
1761
Andrew Trick05938a52015-02-16 18:10:47 +00001762 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001763 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001764 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001765
1766 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1767 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001768 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001769 DAG.getUNDEF(MVT::i32),
1770 PtrInfo,
1771 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001772
1773 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001774 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001775 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001776 }
1777 return Chain;
1778}
1779
Mehdi Amini44ede332015-07-09 02:09:04 +00001780EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1781 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001782 if (!VT.isVector())
1783 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001784 return VT.changeVectorElementTypeToInteger();
1785}
1786
Matt Arsenault209a7b92014-04-18 07:40:20 +00001787static SDValue CompactSwizzlableVector(
1788 SelectionDAG &DAG, SDValue VectorEntry,
1789 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001790 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1791 assert(RemapSwizzle.empty());
1792 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001793 VectorEntry.getOperand(0),
1794 VectorEntry.getOperand(1),
1795 VectorEntry.getOperand(2),
1796 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001797 };
1798
1799 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001800 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1801 // We mask write here to teach later passes that the ith element of this
1802 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1803 // break false dependencies and additionnaly make assembly easier to read.
1804 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001805 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1806 if (C->isZero()) {
1807 RemapSwizzle[i] = 4; // SEL_0
1808 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1809 } else if (C->isExactlyValue(1.0)) {
1810 RemapSwizzle[i] = 5; // SEL_1
1811 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1812 }
1813 }
1814
1815 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1816 continue;
1817 for (unsigned j = 0; j < i; j++) {
1818 if (NewBldVec[i] == NewBldVec[j]) {
1819 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1820 RemapSwizzle[i] = j;
1821 break;
1822 }
1823 }
1824 }
1825
1826 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001827 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001828}
1829
Benjamin Kramer193960c2013-06-11 13:32:25 +00001830static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1831 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001832 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1833 assert(RemapSwizzle.empty());
1834 SDValue NewBldVec[4] = {
1835 VectorEntry.getOperand(0),
1836 VectorEntry.getOperand(1),
1837 VectorEntry.getOperand(2),
1838 VectorEntry.getOperand(3)
1839 };
1840 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001841 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001842 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001843 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1844 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1845 ->getZExtValue();
1846 if (i == Idx)
1847 isUnmovable[Idx] = true;
1848 }
1849 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001850
1851 for (unsigned i = 0; i < 4; i++) {
1852 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1853 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1854 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001855 if (isUnmovable[Idx])
1856 continue;
1857 // Swap i and Idx
1858 std::swap(NewBldVec[Idx], NewBldVec[i]);
1859 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1860 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001861 }
1862 }
1863
1864 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001865 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001866}
1867
1868
1869SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001870 SDValue Swz[4], SelectionDAG &DAG,
1871 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001872 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1873 // Old -> New swizzle values
1874 DenseMap<unsigned, unsigned> SwizzleRemap;
1875
1876 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1877 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001878 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001879 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001880 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001881 }
1882
1883 SwizzleRemap.clear();
1884 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1885 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001886 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001887 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001888 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001889 }
1890
1891 return BuildVector;
1892}
1893
1894
Tom Stellard75aadc22012-12-11 21:25:42 +00001895//===----------------------------------------------------------------------===//
1896// Custom DAG Optimizations
1897//===----------------------------------------------------------------------===//
1898
1899SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1900 DAGCombinerInfo &DCI) const {
1901 SelectionDAG &DAG = DCI.DAG;
1902
1903 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001904 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001905 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1906 case ISD::FP_ROUND: {
1907 SDValue Arg = N->getOperand(0);
1908 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001909 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001910 Arg.getOperand(0));
1911 }
1912 break;
1913 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001914
1915 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1916 // (i32 select_cc f32, f32, -1, 0 cc)
1917 //
1918 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1919 // this to one of the SET*_DX10 instructions.
1920 case ISD::FP_TO_SINT: {
1921 SDValue FNeg = N->getOperand(0);
1922 if (FNeg.getOpcode() != ISD::FNEG) {
1923 return SDValue();
1924 }
1925 SDValue SelectCC = FNeg.getOperand(0);
1926 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1927 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1928 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1929 !isHWTrueValue(SelectCC.getOperand(2)) ||
1930 !isHWFalseValue(SelectCC.getOperand(3))) {
1931 return SDValue();
1932 }
1933
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001934 SDLoc dl(N);
1935 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001936 SelectCC.getOperand(0), // LHS
1937 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001938 DAG.getConstant(-1, dl, MVT::i32), // True
1939 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001940 SelectCC.getOperand(4)); // CC
1941
1942 break;
1943 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001944
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001945 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1946 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001947 case ISD::INSERT_VECTOR_ELT: {
1948 SDValue InVec = N->getOperand(0);
1949 SDValue InVal = N->getOperand(1);
1950 SDValue EltNo = N->getOperand(2);
1951 SDLoc dl(N);
1952
1953 // If the inserted element is an UNDEF, just use the input vector.
1954 if (InVal.getOpcode() == ISD::UNDEF)
1955 return InVec;
1956
1957 EVT VT = InVec.getValueType();
1958
1959 // If we can't generate a legal BUILD_VECTOR, exit
1960 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1961 return SDValue();
1962
1963 // Check that we know which element is being inserted
1964 if (!isa<ConstantSDNode>(EltNo))
1965 return SDValue();
1966 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1967
1968 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1969 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1970 // vector elements.
1971 SmallVector<SDValue, 8> Ops;
1972 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1973 Ops.append(InVec.getNode()->op_begin(),
1974 InVec.getNode()->op_end());
1975 } else if (InVec.getOpcode() == ISD::UNDEF) {
1976 unsigned NElts = VT.getVectorNumElements();
1977 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1978 } else {
1979 return SDValue();
1980 }
1981
1982 // Insert the element
1983 if (Elt < Ops.size()) {
1984 // All the operands of BUILD_VECTOR must have the same type;
1985 // we enforce that here.
1986 EVT OpVT = Ops[0].getValueType();
1987 if (InVal.getValueType() != OpVT)
1988 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1989 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1990 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1991 Ops[Elt] = InVal;
1992 }
1993
1994 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001995 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001996 }
1997
Tom Stellard365366f2013-01-23 02:09:06 +00001998 // Extract_vec (Build_vector) generated by custom lowering
1999 // also needs to be customly combined
2000 case ISD::EXTRACT_VECTOR_ELT: {
2001 SDValue Arg = N->getOperand(0);
2002 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2003 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2004 unsigned Element = Const->getZExtValue();
2005 return Arg->getOperand(Element);
2006 }
2007 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002008 if (Arg.getOpcode() == ISD::BITCAST &&
2009 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2010 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2011 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002012 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002013 Arg->getOperand(0).getOperand(Element));
2014 }
2015 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002016 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002017 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002018
2019 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002020 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002021 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002022 return Ret;
2023
Tom Stellarde06163a2013-02-07 14:02:35 +00002024 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2025 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002026 //
2027 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2028 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002029 SDValue LHS = N->getOperand(0);
2030 if (LHS.getOpcode() != ISD::SELECT_CC) {
2031 return SDValue();
2032 }
2033
2034 SDValue RHS = N->getOperand(1);
2035 SDValue True = N->getOperand(2);
2036 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002037 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002038
2039 if (LHS.getOperand(2).getNode() != True.getNode() ||
2040 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002041 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002042 return SDValue();
2043 }
2044
Tom Stellard5e524892013-03-08 15:37:11 +00002045 switch (NCC) {
2046 default: return SDValue();
2047 case ISD::SETNE: return LHS;
2048 case ISD::SETEQ: {
2049 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2050 LHSCC = ISD::getSetCCInverse(LHSCC,
2051 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002052 if (DCI.isBeforeLegalizeOps() ||
2053 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2054 return DAG.getSelectCC(SDLoc(N),
2055 LHS.getOperand(0),
2056 LHS.getOperand(1),
2057 LHS.getOperand(2),
2058 LHS.getOperand(3),
2059 LHSCC);
2060 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002061 }
Tom Stellard5e524892013-03-08 15:37:11 +00002062 }
Tom Stellardcd428182013-09-28 02:50:38 +00002063 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002064 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002065
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002066 case AMDGPUISD::EXPORT: {
2067 SDValue Arg = N->getOperand(1);
2068 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2069 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002070
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002071 SDValue NewArgs[8] = {
2072 N->getOperand(0), // Chain
2073 SDValue(),
2074 N->getOperand(2), // ArrayBase
2075 N->getOperand(3), // Type
2076 N->getOperand(4), // SWZ_X
2077 N->getOperand(5), // SWZ_Y
2078 N->getOperand(6), // SWZ_Z
2079 N->getOperand(7) // SWZ_W
2080 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002081 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002082 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002083 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002084 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002085 case AMDGPUISD::TEXTURE_FETCH: {
2086 SDValue Arg = N->getOperand(1);
2087 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2088 break;
2089
2090 SDValue NewArgs[19] = {
2091 N->getOperand(0),
2092 N->getOperand(1),
2093 N->getOperand(2),
2094 N->getOperand(3),
2095 N->getOperand(4),
2096 N->getOperand(5),
2097 N->getOperand(6),
2098 N->getOperand(7),
2099 N->getOperand(8),
2100 N->getOperand(9),
2101 N->getOperand(10),
2102 N->getOperand(11),
2103 N->getOperand(12),
2104 N->getOperand(13),
2105 N->getOperand(14),
2106 N->getOperand(15),
2107 N->getOperand(16),
2108 N->getOperand(17),
2109 N->getOperand(18),
2110 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002111 SDLoc DL(N);
2112 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2113 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002114 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002115 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002116
2117 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002118}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002119
2120static bool
2121FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002122 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002123 const R600InstrInfo *TII =
2124 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002125 if (!Src.isMachineOpcode())
2126 return false;
2127 switch (Src.getMachineOpcode()) {
2128 case AMDGPU::FNEG_R600:
2129 if (!Neg.getNode())
2130 return false;
2131 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002132 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002133 return true;
2134 case AMDGPU::FABS_R600:
2135 if (!Abs.getNode())
2136 return false;
2137 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002138 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002139 return true;
2140 case AMDGPU::CONST_COPY: {
2141 unsigned Opcode = ParentNode->getMachineOpcode();
2142 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2143
2144 if (!Sel.getNode())
2145 return false;
2146
2147 SDValue CstOffset = Src.getOperand(0);
2148 if (ParentNode->getValueType(0).isVector())
2149 return false;
2150
2151 // Gather constants values
2152 int SrcIndices[] = {
2153 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2154 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2155 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2156 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2157 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2158 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2159 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2160 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2161 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2162 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2163 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2164 };
2165 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002166 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002167 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2168 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2169 continue;
2170 if (HasDst) {
2171 OtherSrcIdx--;
2172 OtherSelIdx--;
2173 }
2174 if (RegisterSDNode *Reg =
2175 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2176 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002177 ConstantSDNode *Cst
2178 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002179 Consts.push_back(Cst->getZExtValue());
2180 }
2181 }
2182 }
2183
Matt Arsenault37c12d72014-05-12 20:42:57 +00002184 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002185 Consts.push_back(Cst->getZExtValue());
2186 if (!TII->fitsConstReadLimitations(Consts)) {
2187 return false;
2188 }
2189
2190 Sel = CstOffset;
2191 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2192 return true;
2193 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002194 case AMDGPU::MOV_IMM_I32:
2195 case AMDGPU::MOV_IMM_F32: {
2196 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2197 uint64_t ImmValue = 0;
2198
2199
2200 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2201 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2202 float FloatValue = FPC->getValueAPF().convertToFloat();
2203 if (FloatValue == 0.0) {
2204 ImmReg = AMDGPU::ZERO;
2205 } else if (FloatValue == 0.5) {
2206 ImmReg = AMDGPU::HALF;
2207 } else if (FloatValue == 1.0) {
2208 ImmReg = AMDGPU::ONE;
2209 } else {
2210 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2211 }
2212 } else {
2213 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2214 uint64_t Value = C->getZExtValue();
2215 if (Value == 0) {
2216 ImmReg = AMDGPU::ZERO;
2217 } else if (Value == 1) {
2218 ImmReg = AMDGPU::ONE_INT;
2219 } else {
2220 ImmValue = Value;
2221 }
2222 }
2223
2224 // Check that we aren't already using an immediate.
2225 // XXX: It's possible for an instruction to have more than one
2226 // immediate operand, but this is not supported yet.
2227 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2228 if (!Imm.getNode())
2229 return false;
2230 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2231 assert(C);
2232 if (C->getZExtValue())
2233 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002234 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002235 }
2236 Src = DAG.getRegister(ImmReg, MVT::i32);
2237 return true;
2238 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002239 default:
2240 return false;
2241 }
2242}
2243
2244
2245/// \brief Fold the instructions after selecting them
2246SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2247 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002248 const R600InstrInfo *TII =
2249 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002250 if (!Node->isMachineOpcode())
2251 return Node;
2252 unsigned Opcode = Node->getMachineOpcode();
2253 SDValue FakeOp;
2254
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002255 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002256
2257 if (Opcode == AMDGPU::DOT_4) {
2258 int OperandIdx[] = {
2259 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2260 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2261 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2262 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2263 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2264 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2265 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2266 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002267 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002268 int NegIdx[] = {
2269 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2270 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2271 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2272 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2273 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2274 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2275 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2276 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2277 };
2278 int AbsIdx[] = {
2279 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2280 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2281 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2282 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2283 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2284 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2285 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2286 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2287 };
2288 for (unsigned i = 0; i < 8; i++) {
2289 if (OperandIdx[i] < 0)
2290 return Node;
2291 SDValue &Src = Ops[OperandIdx[i] - 1];
2292 SDValue &Neg = Ops[NegIdx[i] - 1];
2293 SDValue &Abs = Ops[AbsIdx[i] - 1];
2294 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2295 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2296 if (HasDst)
2297 SelIdx--;
2298 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002299 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2300 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2301 }
2302 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2303 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2304 SDValue &Src = Ops[i];
2305 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002306 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2307 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002308 } else if (Opcode == AMDGPU::CLAMP_R600) {
2309 SDValue Src = Node->getOperand(0);
2310 if (!Src.isMachineOpcode() ||
2311 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2312 return Node;
2313 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2314 AMDGPU::OpName::clamp);
2315 if (ClampIdx < 0)
2316 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002317 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002318 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002319 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2320 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2321 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002322 } else {
2323 if (!TII->hasInstrModifiers(Opcode))
2324 return Node;
2325 int OperandIdx[] = {
2326 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2327 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2328 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2329 };
2330 int NegIdx[] = {
2331 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2332 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2333 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2334 };
2335 int AbsIdx[] = {
2336 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2337 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2338 -1
2339 };
2340 for (unsigned i = 0; i < 3; i++) {
2341 if (OperandIdx[i] < 0)
2342 return Node;
2343 SDValue &Src = Ops[OperandIdx[i] - 1];
2344 SDValue &Neg = Ops[NegIdx[i] - 1];
2345 SDValue FakeAbs;
2346 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2347 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2348 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002349 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2350 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002351 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002352 ImmIdx--;
2353 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002354 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002355 SDValue &Imm = Ops[ImmIdx];
2356 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002357 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2358 }
2359 }
2360
2361 return Node;
2362}