blob: b249a9d13a9962c495ddcf731d05401f1fd1a363 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Eric Christopher7792e322015-01-30 23:24:40 +000033R600TargetLowering::R600TargetLowering(TargetMachine &TM,
34 const AMDGPUSubtarget &STI)
35 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000040 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Tom Stellard0351ea22013-09-28 02:50:50 +000045 // Set condition code actions
46 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000048 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000049 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000050 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000052 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
54 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
55 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000056 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
58
59 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
60 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
61 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
62 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
63
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000064 setOperationAction(ISD::FCOS, MVT::f32, Custom);
65 setOperationAction(ISD::FSIN, MVT::f32, Custom);
66
Tom Stellard75aadc22012-12-11 21:25:42 +000067 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellard492ebea2013-03-08 15:37:07 +000070 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
71 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +000072 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
74 setOperationAction(ISD::FSUB, MVT::f32, Expand);
75
76 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
81 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
82
Tom Stellarde8f9f282013-03-08 15:37:05 +000083 setOperationAction(ISD::SETCC, MVT::i32, Expand);
84 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +000086 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
87 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellard53f2f902013-09-05 18:38:03 +000089 setOperationAction(ISD::SELECT, MVT::i32, Expand);
90 setOperationAction(ISD::SELECT, MVT::f32, Expand);
91 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000092 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000093
Jan Vesely808fff52015-04-30 17:15:56 +000094 // ADD, SUB overflow.
95 // TODO: turn these into Legal?
96 if (Subtarget->hasCARRY())
97 setOperationAction(ISD::UADDO, MVT::i32, Custom);
98
99 if (Subtarget->hasBORROW())
100 setOperationAction(ISD::USUBO, MVT::i32, Custom);
101
Matt Arsenault4e466652014-04-16 01:41:30 +0000102 // Expand sign extension of vectors
103 if (!Subtarget->hasBFE())
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
108
109 if (!Subtarget->hasBFE())
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
113
114 if (!Subtarget->hasBFE())
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
118
119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
120 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
122
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
124
125
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000126 // Legalize loads and stores to the private address space.
127 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000128 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000129 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000130
131 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132 // spaces, so it is custom lowered to handle those where it isn't.
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000133 for (MVT VT : MVT::integer_valuetypes()) {
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000137
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
Matt Arsenault2a495972014-11-23 02:57:54 +0000141
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
145 }
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000146
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000147 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000148 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000149 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000151 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000153
Tom Stellard365366f2013-01-23 02:09:06 +0000154 setOperationAction(ISD::LOAD, MVT::i32, Custom);
155 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000156 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
157
Tom Stellard880a80a2014-06-17 16:53:14 +0000158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
160 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
161 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
162
163 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
164 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
165 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
166 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
167
Tom Stellard75aadc22012-12-11 21:25:42 +0000168 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000169 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000170 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000171 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000172 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
Tom Stellard5f337882014-04-29 23:12:43 +0000174 // These should be replaced by UDVIREM, but it does not happen automatically
175 // during Type Legalization
176 setOperationAction(ISD::UDIV, MVT::i64, Custom);
177 setOperationAction(ISD::UREM, MVT::i64, Custom);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000178 setOperationAction(ISD::SDIV, MVT::i64, Custom);
179 setOperationAction(ISD::SREM, MVT::i64, Custom);
Tom Stellard5f337882014-04-29 23:12:43 +0000180
Jan Vesely25f36272014-06-18 12:27:13 +0000181 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
182 // to be Legal/Custom in order to avoid library calls.
183 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000184 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000185 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000186
Michel Danzer49812b52013-07-10 16:37:07 +0000187 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
188
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000189 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
190 for (MVT VT : ScalarIntVTs) {
191 setOperationAction(ISD::ADDC, VT, Expand);
192 setOperationAction(ISD::SUBC, VT, Expand);
193 setOperationAction(ISD::ADDE, VT, Expand);
194 setOperationAction(ISD::SUBE, VT, Expand);
195 }
196
Tom Stellardfc455472013-08-12 22:33:21 +0000197 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000198}
199
200MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
201 MachineInstr * MI, MachineBasicBlock * BB) const {
202 MachineFunction * MF = BB->getParent();
203 MachineRegisterInfo &MRI = MF->getRegInfo();
204 MachineBasicBlock::iterator I = *MI;
Eric Christopherfc6de422014-08-05 02:39:49 +0000205 const R600InstrInfo *TII =
Eric Christopher7792e322015-01-30 23:24:40 +0000206 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000207
208 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000209 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000210 // Replace LDS_*_RET instruction that don't have any uses with the
211 // equivalent LDS_*_NORET instruction.
212 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000213 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
214 assert(DstIdx != -1);
215 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000216 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
217 // LDS_1A2D support and remove this special case.
218 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
219 MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000220 return BB;
221
222 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
223 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000224 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
225 NewMI.addOperand(MI->getOperand(i));
226 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000227 } else {
228 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
229 }
230 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000231 case AMDGPU::CLAMP_R600: {
232 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
233 AMDGPU::MOV,
234 MI->getOperand(0).getReg(),
235 MI->getOperand(1).getReg());
236 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
237 break;
238 }
239
240 case AMDGPU::FABS_R600: {
241 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
242 AMDGPU::MOV,
243 MI->getOperand(0).getReg(),
244 MI->getOperand(1).getReg());
245 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
246 break;
247 }
248
249 case AMDGPU::FNEG_R600: {
250 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
251 AMDGPU::MOV,
252 MI->getOperand(0).getReg(),
253 MI->getOperand(1).getReg());
254 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
255 break;
256 }
257
Tom Stellard75aadc22012-12-11 21:25:42 +0000258 case AMDGPU::MASK_WRITE: {
259 unsigned maskedRegister = MI->getOperand(0).getReg();
260 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
261 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
262 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
263 break;
264 }
265
266 case AMDGPU::MOV_IMM_F32:
267 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
268 MI->getOperand(1).getFPImm()->getValueAPF()
269 .bitcastToAPInt().getZExtValue());
270 break;
271 case AMDGPU::MOV_IMM_I32:
272 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
273 MI->getOperand(1).getImm());
274 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000275 case AMDGPU::CONST_COPY: {
276 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
277 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000278 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000279 MI->getOperand(1).getImm());
280 break;
281 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000282
283 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000284 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000285 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000286 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000287
288 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
289 .addOperand(MI->getOperand(0))
290 .addOperand(MI->getOperand(1))
291 .addImm(EOP); // Set End of program bit
292 break;
293 }
294
Tom Stellard75aadc22012-12-11 21:25:42 +0000295 case AMDGPU::TXD: {
296 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
297 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 MachineOperand &RID = MI->getOperand(4);
299 MachineOperand &SID = MI->getOperand(5);
300 unsigned TextureId = MI->getOperand(6).getImm();
301 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
302 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000303
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000304 switch (TextureId) {
305 case 5: // Rect
306 CTX = CTY = 0;
307 break;
308 case 6: // Shadow1D
309 SrcW = SrcZ;
310 break;
311 case 7: // Shadow2D
312 SrcW = SrcZ;
313 break;
314 case 8: // ShadowRect
315 CTX = CTY = 0;
316 SrcW = SrcZ;
317 break;
318 case 9: // 1DArray
319 SrcZ = SrcY;
320 CTZ = 0;
321 break;
322 case 10: // 2DArray
323 CTZ = 0;
324 break;
325 case 11: // Shadow1DArray
326 SrcZ = SrcY;
327 CTZ = 0;
328 break;
329 case 12: // Shadow2DArray
330 CTZ = 0;
331 break;
332 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000333 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
334 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000335 .addImm(SrcX)
336 .addImm(SrcY)
337 .addImm(SrcZ)
338 .addImm(SrcW)
339 .addImm(0)
340 .addImm(0)
341 .addImm(0)
342 .addImm(0)
343 .addImm(1)
344 .addImm(2)
345 .addImm(3)
346 .addOperand(RID)
347 .addOperand(SID)
348 .addImm(CTX)
349 .addImm(CTY)
350 .addImm(CTZ)
351 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000352 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
353 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000354 .addImm(SrcX)
355 .addImm(SrcY)
356 .addImm(SrcZ)
357 .addImm(SrcW)
358 .addImm(0)
359 .addImm(0)
360 .addImm(0)
361 .addImm(0)
362 .addImm(1)
363 .addImm(2)
364 .addImm(3)
365 .addOperand(RID)
366 .addOperand(SID)
367 .addImm(CTX)
368 .addImm(CTY)
369 .addImm(CTZ)
370 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000371 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
372 .addOperand(MI->getOperand(0))
373 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000374 .addImm(SrcX)
375 .addImm(SrcY)
376 .addImm(SrcZ)
377 .addImm(SrcW)
378 .addImm(0)
379 .addImm(0)
380 .addImm(0)
381 .addImm(0)
382 .addImm(1)
383 .addImm(2)
384 .addImm(3)
385 .addOperand(RID)
386 .addOperand(SID)
387 .addImm(CTX)
388 .addImm(CTY)
389 .addImm(CTZ)
390 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000391 .addReg(T0, RegState::Implicit)
392 .addReg(T1, RegState::Implicit);
393 break;
394 }
395
396 case AMDGPU::TXD_SHADOW: {
397 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
398 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000399 MachineOperand &RID = MI->getOperand(4);
400 MachineOperand &SID = MI->getOperand(5);
401 unsigned TextureId = MI->getOperand(6).getImm();
402 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
403 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
404
405 switch (TextureId) {
406 case 5: // Rect
407 CTX = CTY = 0;
408 break;
409 case 6: // Shadow1D
410 SrcW = SrcZ;
411 break;
412 case 7: // Shadow2D
413 SrcW = SrcZ;
414 break;
415 case 8: // ShadowRect
416 CTX = CTY = 0;
417 SrcW = SrcZ;
418 break;
419 case 9: // 1DArray
420 SrcZ = SrcY;
421 CTZ = 0;
422 break;
423 case 10: // 2DArray
424 CTZ = 0;
425 break;
426 case 11: // Shadow1DArray
427 SrcZ = SrcY;
428 CTZ = 0;
429 break;
430 case 12: // Shadow2DArray
431 CTZ = 0;
432 break;
433 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000434
435 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
436 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000437 .addImm(SrcX)
438 .addImm(SrcY)
439 .addImm(SrcZ)
440 .addImm(SrcW)
441 .addImm(0)
442 .addImm(0)
443 .addImm(0)
444 .addImm(0)
445 .addImm(1)
446 .addImm(2)
447 .addImm(3)
448 .addOperand(RID)
449 .addOperand(SID)
450 .addImm(CTX)
451 .addImm(CTY)
452 .addImm(CTZ)
453 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
455 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000456 .addImm(SrcX)
457 .addImm(SrcY)
458 .addImm(SrcZ)
459 .addImm(SrcW)
460 .addImm(0)
461 .addImm(0)
462 .addImm(0)
463 .addImm(0)
464 .addImm(1)
465 .addImm(2)
466 .addImm(3)
467 .addOperand(RID)
468 .addOperand(SID)
469 .addImm(CTX)
470 .addImm(CTY)
471 .addImm(CTZ)
472 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
474 .addOperand(MI->getOperand(0))
475 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000476 .addImm(SrcX)
477 .addImm(SrcY)
478 .addImm(SrcZ)
479 .addImm(SrcW)
480 .addImm(0)
481 .addImm(0)
482 .addImm(0)
483 .addImm(0)
484 .addImm(1)
485 .addImm(2)
486 .addImm(3)
487 .addOperand(RID)
488 .addOperand(SID)
489 .addImm(CTX)
490 .addImm(CTY)
491 .addImm(CTZ)
492 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000493 .addReg(T0, RegState::Implicit)
494 .addReg(T1, RegState::Implicit);
495 break;
496 }
497
498 case AMDGPU::BRANCH:
499 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000500 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000501 break;
502
503 case AMDGPU::BRANCH_COND_f32: {
504 MachineInstr *NewMI =
505 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
506 AMDGPU::PREDICATE_BIT)
507 .addOperand(MI->getOperand(1))
508 .addImm(OPCODE_IS_NOT_ZERO)
509 .addImm(0); // Flags
510 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000511 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000512 .addOperand(MI->getOperand(0))
513 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
514 break;
515 }
516
517 case AMDGPU::BRANCH_COND_i32: {
518 MachineInstr *NewMI =
519 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
520 AMDGPU::PREDICATE_BIT)
521 .addOperand(MI->getOperand(1))
522 .addImm(OPCODE_IS_NOT_ZERO_INT)
523 .addImm(0); // Flags
524 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000525 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000526 .addOperand(MI->getOperand(0))
527 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
528 break;
529 }
530
Tom Stellard75aadc22012-12-11 21:25:42 +0000531 case AMDGPU::EG_ExportSwz:
532 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000533 // Instruction is left unmodified if its not the last one of its type
534 bool isLastInstructionOfItsType = true;
535 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000536 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000537 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000538 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000539 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
540 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
541 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
542 .getImm();
543 if (CurrentInstExportType == InstExportType) {
544 isLastInstructionOfItsType = false;
545 break;
546 }
547 }
548 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000549 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000550 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000551 return BB;
552 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
553 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
554 .addOperand(MI->getOperand(0))
555 .addOperand(MI->getOperand(1))
556 .addOperand(MI->getOperand(2))
557 .addOperand(MI->getOperand(3))
558 .addOperand(MI->getOperand(4))
559 .addOperand(MI->getOperand(5))
560 .addOperand(MI->getOperand(6))
561 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000562 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000563 break;
564 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000565 case AMDGPU::RETURN: {
566 // RETURN instructions must have the live-out registers as implicit uses,
567 // otherwise they appear dead.
568 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
569 MachineInstrBuilder MIB(*MF, MI);
570 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
571 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
572 return BB;
573 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000574 }
575
576 MI->eraseFromParent();
577 return BB;
578}
579
580//===----------------------------------------------------------------------===//
581// Custom DAG Lowering Operations
582//===----------------------------------------------------------------------===//
583
Tom Stellard75aadc22012-12-11 21:25:42 +0000584SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000585 MachineFunction &MF = DAG.getMachineFunction();
586 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000587 switch (Op.getOpcode()) {
588 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000589 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
590 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000591 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000592 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000593 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000594 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
595 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000596 case ISD::FCOS:
597 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000598 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000599 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000600 case ISD::LOAD: {
601 SDValue Result = LowerLOAD(Op, DAG);
602 assert((!Result.getNode() ||
603 Result.getNode()->getNumValues() == 2) &&
604 "Load should return a value and a chain");
605 return Result;
606 }
607
Matt Arsenault1d555c42014-06-23 18:00:55 +0000608 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000609 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000610 case ISD::INTRINSIC_VOID: {
611 SDValue Chain = Op.getOperand(0);
612 unsigned IntrinsicID =
613 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
614 switch (IntrinsicID) {
615 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000616 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
617 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000618 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000619 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000621 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000622 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000623 const SDValue Args[8] = {
624 Chain,
625 Op.getOperand(2), // Export Value
626 Op.getOperand(3), // ArrayBase
627 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000628 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
629 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
630 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
631 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000632 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000633 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000634 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000635
Tom Stellard75aadc22012-12-11 21:25:42 +0000636 // default for switch(IntrinsicID)
637 default: break;
638 }
639 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
640 break;
641 }
642 case ISD::INTRINSIC_WO_CHAIN: {
643 unsigned IntrinsicID =
644 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
645 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000646 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000647 switch(IntrinsicID) {
648 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000649 case AMDGPUIntrinsic::R600_load_input: {
650 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
651 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
652 MachineFunction &MF = DAG.getMachineFunction();
653 MachineRegisterInfo &MRI = MF.getRegInfo();
654 MRI.addLiveIn(Reg);
655 return DAG.getCopyFromReg(DAG.getEntryNode(),
656 SDLoc(DAG.getEntryNode()), Reg, VT);
657 }
658
659 case AMDGPUIntrinsic::R600_interp_input: {
660 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
661 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
662 MachineSDNode *interp;
663 if (ijb < 0) {
Eric Christopher7792e322015-01-30 23:24:40 +0000664 const R600InstrInfo *TII =
665 static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000666 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000667 MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000668 return DAG.getTargetExtractSubreg(
669 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
670 DL, MVT::f32, SDValue(interp, 0));
671 }
672 MachineFunction &MF = DAG.getMachineFunction();
673 MachineRegisterInfo &MRI = MF.getRegInfo();
674 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
675 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
676 MRI.addLiveIn(RegisterI);
677 MRI.addLiveIn(RegisterJ);
678 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
679 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
680 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
681 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
682
683 if (slot % 4 < 2)
684 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000685 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000686 RegisterJNode, RegisterINode);
687 else
688 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000689 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000690 RegisterJNode, RegisterINode);
691 return SDValue(interp, slot % 2);
692 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000693 case AMDGPUIntrinsic::R600_interp_xy:
694 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000695 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000696 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000697 SDValue RegisterINode = Op.getOperand(2);
698 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000699
Vincent Lejeunef143af32013-11-11 22:10:24 +0000700 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000701 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000702 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000703 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000704 else
705 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000706 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000707 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000708 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
709 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000710 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000711 case AMDGPUIntrinsic::R600_tex:
712 case AMDGPUIntrinsic::R600_texc:
713 case AMDGPUIntrinsic::R600_txl:
714 case AMDGPUIntrinsic::R600_txlc:
715 case AMDGPUIntrinsic::R600_txb:
716 case AMDGPUIntrinsic::R600_txbc:
717 case AMDGPUIntrinsic::R600_txf:
718 case AMDGPUIntrinsic::R600_txq:
719 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000720 case AMDGPUIntrinsic::R600_ddy:
721 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000722 unsigned TextureOp;
723 switch (IntrinsicID) {
724 case AMDGPUIntrinsic::R600_tex:
725 TextureOp = 0;
726 break;
727 case AMDGPUIntrinsic::R600_texc:
728 TextureOp = 1;
729 break;
730 case AMDGPUIntrinsic::R600_txl:
731 TextureOp = 2;
732 break;
733 case AMDGPUIntrinsic::R600_txlc:
734 TextureOp = 3;
735 break;
736 case AMDGPUIntrinsic::R600_txb:
737 TextureOp = 4;
738 break;
739 case AMDGPUIntrinsic::R600_txbc:
740 TextureOp = 5;
741 break;
742 case AMDGPUIntrinsic::R600_txf:
743 TextureOp = 6;
744 break;
745 case AMDGPUIntrinsic::R600_txq:
746 TextureOp = 7;
747 break;
748 case AMDGPUIntrinsic::R600_ddx:
749 TextureOp = 8;
750 break;
751 case AMDGPUIntrinsic::R600_ddy:
752 TextureOp = 9;
753 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000754 case AMDGPUIntrinsic::R600_ldptr:
755 TextureOp = 10;
756 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000757 default:
758 llvm_unreachable("Unknow Texture Operation");
759 }
760
761 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000762 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000763 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000764 DAG.getConstant(0, DL, MVT::i32),
765 DAG.getConstant(1, DL, MVT::i32),
766 DAG.getConstant(2, DL, MVT::i32),
767 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000768 Op.getOperand(2),
769 Op.getOperand(3),
770 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000771 DAG.getConstant(0, DL, MVT::i32),
772 DAG.getConstant(1, DL, MVT::i32),
773 DAG.getConstant(2, DL, MVT::i32),
774 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000775 Op.getOperand(5),
776 Op.getOperand(6),
777 Op.getOperand(7),
778 Op.getOperand(8),
779 Op.getOperand(9),
780 Op.getOperand(10)
781 };
Craig Topper48d114b2014-04-26 18:35:24 +0000782 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000783 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000784 case AMDGPUIntrinsic::AMDGPU_dp4: {
785 SDValue Args[8] = {
786 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000787 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000788 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000789 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000790 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000791 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000792 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000793 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000794 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000795 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000796 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000797 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000798 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000799 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000800 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000801 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000802 };
Craig Topper48d114b2014-04-26 18:35:24 +0000803 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000804 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000805
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000806 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000807 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000808 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000810 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000812 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000813 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000814 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000815 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000816 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000817 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000818 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000820 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000821 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000822 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000823 return LowerImplicitParameter(DAG, VT, DL, 8);
824
Jan Veselye5121f32014-10-14 20:05:26 +0000825 case Intrinsic::AMDGPU_read_workdim:
826 return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
827
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000828 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000829 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
830 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000831 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000832 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
833 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000834 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000835 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
836 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000837 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000838 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
839 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000840 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000841 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
842 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000843 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000844 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
845 AMDGPU::T0_Z, VT);
Matt Arsenault257d48d2014-06-24 22:13:39 +0000846 case Intrinsic::AMDGPU_rsq:
847 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
848 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Marek Olsak43650e42015-03-24 13:40:08 +0000849
850 case AMDGPUIntrinsic::AMDGPU_fract:
851 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
852 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000853 }
854 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
855 break;
856 }
857 } // end switch(Op.getOpcode())
858 return SDValue();
859}
860
861void R600TargetLowering::ReplaceNodeResults(SDNode *N,
862 SmallVectorImpl<SDValue> &Results,
863 SelectionDAG &DAG) const {
864 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000865 default:
866 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
867 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000868 case ISD::FP_TO_UINT:
869 if (N->getValueType(0) == MVT::i1) {
870 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
871 return;
872 }
873 // Fall-through. Since we don't care about out of bounds values
874 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
875 // considers some extra cases which are not necessary here.
876 case ISD::FP_TO_SINT: {
877 SDValue Result;
878 if (expandFP_TO_SINT(N, Result, DAG))
879 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000880 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000881 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000882 case ISD::UDIV: {
883 SDValue Op = SDValue(N, 0);
884 SDLoc DL(Op);
885 EVT VT = Op.getValueType();
886 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
887 N->getOperand(0), N->getOperand(1));
888 Results.push_back(UDIVREM);
889 break;
890 }
891 case ISD::UREM: {
892 SDValue Op = SDValue(N, 0);
893 SDLoc DL(Op);
894 EVT VT = Op.getValueType();
895 SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
896 N->getOperand(0), N->getOperand(1));
897 Results.push_back(UDIVREM.getValue(1));
898 break;
899 }
900 case ISD::SDIV: {
901 SDValue Op = SDValue(N, 0);
902 SDLoc DL(Op);
903 EVT VT = Op.getValueType();
904 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
905 N->getOperand(0), N->getOperand(1));
906 Results.push_back(SDIVREM);
907 break;
908 }
909 case ISD::SREM: {
910 SDValue Op = SDValue(N, 0);
911 SDLoc DL(Op);
912 EVT VT = Op.getValueType();
913 SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
914 N->getOperand(0), N->getOperand(1));
915 Results.push_back(SDIVREM.getValue(1));
916 break;
917 }
918 case ISD::SDIVREM: {
919 SDValue Op = SDValue(N, 1);
920 SDValue RES = LowerSDIVREM(Op, DAG);
921 Results.push_back(RES);
922 Results.push_back(RES.getValue(1));
923 break;
924 }
925 case ISD::UDIVREM: {
926 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000927 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000928 break;
929 }
930 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000931}
932
Tom Stellard880a80a2014-06-17 16:53:14 +0000933SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
934 SDValue Vector) const {
935
936 SDLoc DL(Vector);
937 EVT VecVT = Vector.getValueType();
938 EVT EltVT = VecVT.getVectorElementType();
939 SmallVector<SDValue, 8> Args;
940
941 for (unsigned i = 0, e = VecVT.getVectorNumElements();
942 i != e; ++i) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000943 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
944 DAG.getConstant(i, DL, getVectorIdxTy())));
Tom Stellard880a80a2014-06-17 16:53:14 +0000945 }
946
947 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
948}
949
950SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
951 SelectionDAG &DAG) const {
952
953 SDLoc DL(Op);
954 SDValue Vector = Op.getOperand(0);
955 SDValue Index = Op.getOperand(1);
956
957 if (isa<ConstantSDNode>(Index) ||
958 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
959 return Op;
960
961 Vector = vectorToVerticalVector(DAG, Vector);
962 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
963 Vector, Index);
964}
965
966SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
967 SelectionDAG &DAG) const {
968 SDLoc DL(Op);
969 SDValue Vector = Op.getOperand(0);
970 SDValue Value = Op.getOperand(1);
971 SDValue Index = Op.getOperand(2);
972
973 if (isa<ConstantSDNode>(Index) ||
974 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
975 return Op;
976
977 Vector = vectorToVerticalVector(DAG, Vector);
978 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
979 Vector, Value, Index);
980 return vectorToVerticalVector(DAG, Insert);
981}
982
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000983SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
984 // On hw >= R700, COS/SIN input must be between -1. and 1.
985 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
986 EVT VT = Op.getValueType();
987 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000988 SDLoc DL(Op);
989 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
990 DAG.getNode(ISD::FADD, DL, VT,
991 DAG.getNode(ISD::FMUL, DL, VT, Arg,
992 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
993 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000994 unsigned TrigNode;
995 switch (Op.getOpcode()) {
996 case ISD::FCOS:
997 TrigNode = AMDGPUISD::COS_HW;
998 break;
999 case ISD::FSIN:
1000 TrigNode = AMDGPUISD::SIN_HW;
1001 break;
1002 default:
1003 llvm_unreachable("Wrong trig opcode");
1004 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001005 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
1006 DAG.getNode(ISD::FADD, DL, VT, FractPart,
1007 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001008 if (Gen >= AMDGPUSubtarget::R700)
1009 return TrigVal;
1010 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001011 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
1012 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +00001013}
1014
Jan Vesely25f36272014-06-18 12:27:13 +00001015SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
1016 SDLoc DL(Op);
1017 EVT VT = Op.getValueType();
1018
1019 SDValue Lo = Op.getOperand(0);
1020 SDValue Hi = Op.getOperand(1);
1021 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001022 SDValue Zero = DAG.getConstant(0, DL, VT);
1023 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +00001024
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001025 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1026 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +00001027 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1028 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1029
1030 // The dance around Width1 is necessary for 0 special case.
1031 // Without it the CompShift might be 32, producing incorrect results in
1032 // Overflow. So we do the shift in two steps, the alternative is to
1033 // add a conditional to filter the special case.
1034
1035 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
1036 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
1037
1038 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
1039 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1040 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1041
1042 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1043 SDValue LoBig = Zero;
1044
1045 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1046 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1047
1048 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1049}
1050
Jan Vesely900ff2e2014-06-18 12:27:15 +00001051SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1052 SDLoc DL(Op);
1053 EVT VT = Op.getValueType();
1054
1055 SDValue Lo = Op.getOperand(0);
1056 SDValue Hi = Op.getOperand(1);
1057 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001058 SDValue Zero = DAG.getConstant(0, DL, VT);
1059 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001060
Jan Veselyecf51332014-06-18 12:27:17 +00001061 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1062
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001063 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1064 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001065 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1066 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1067
1068 // The dance around Width1 is necessary for 0 special case.
1069 // Without it the CompShift might be 32, producing incorrect results in
1070 // Overflow. So we do the shift in two steps, the alternative is to
1071 // add a conditional to filter the special case.
1072
1073 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1074 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1075
Jan Veselyecf51332014-06-18 12:27:17 +00001076 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001077 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1078 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1079
Jan Veselyecf51332014-06-18 12:27:17 +00001080 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1081 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001082
1083 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1084 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1085
1086 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1087}
1088
Jan Vesely808fff52015-04-30 17:15:56 +00001089SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1090 unsigned mainop, unsigned ovf) const {
1091 SDLoc DL(Op);
1092 EVT VT = Op.getValueType();
1093
1094 SDValue Lo = Op.getOperand(0);
1095 SDValue Hi = Op.getOperand(1);
1096
1097 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1098 // Extend sign.
1099 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1100 DAG.getValueType(MVT::i1));
1101
1102 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1103
1104 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1105}
1106
Tom Stellard75aadc22012-12-11 21:25:42 +00001107SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001108 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001109 return DAG.getNode(
1110 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001111 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001112 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001113 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001114 DAG.getCondCode(ISD::SETNE)
1115 );
1116}
1117
Tom Stellard75aadc22012-12-11 21:25:42 +00001118SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001119 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001120 unsigned DwordOffset) const {
1121 unsigned ByteOffset = DwordOffset * 4;
1122 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001123 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001124
1125 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1126 assert(isInt<16>(ByteOffset));
1127
1128 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001129 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001130 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1131 false, false, false, 0);
1132}
1133
Tom Stellard75aadc22012-12-11 21:25:42 +00001134bool R600TargetLowering::isZero(SDValue Op) const {
1135 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1136 return Cst->isNullValue();
1137 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1138 return CstFP->isZero();
1139 } else {
1140 return false;
1141 }
1142}
1143
1144SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001145 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001146 EVT VT = Op.getValueType();
1147
1148 SDValue LHS = Op.getOperand(0);
1149 SDValue RHS = Op.getOperand(1);
1150 SDValue True = Op.getOperand(2);
1151 SDValue False = Op.getOperand(3);
1152 SDValue CC = Op.getOperand(4);
1153 SDValue Temp;
1154
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001155 if (VT == MVT::f32) {
1156 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1157 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1158 if (MinMax)
1159 return MinMax;
1160 }
1161
Tom Stellard75aadc22012-12-11 21:25:42 +00001162 // LHS and RHS are guaranteed to be the same value type
1163 EVT CompareVT = LHS.getValueType();
1164
1165 // Check if we can lower this to a native operation.
1166
Tom Stellard2add82d2013-03-08 15:37:09 +00001167 // Try to lower to a SET* instruction:
1168 //
1169 // SET* can match the following patterns:
1170 //
Tom Stellardcd428182013-09-28 02:50:38 +00001171 // select_cc f32, f32, -1, 0, cc_supported
1172 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1173 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001174 //
1175
1176 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001177 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1178 ISD::CondCode InverseCC =
1179 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001180 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1181 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1182 std::swap(False, True);
1183 CC = DAG.getCondCode(InverseCC);
1184 } else {
1185 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1186 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1187 std::swap(False, True);
1188 std::swap(LHS, RHS);
1189 CC = DAG.getCondCode(SwapInvCC);
1190 }
1191 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001192 }
1193
1194 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1195 (CompareVT == VT || VT == MVT::i32)) {
1196 // This can be matched by a SET* instruction.
1197 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1198 }
1199
Tom Stellard75aadc22012-12-11 21:25:42 +00001200 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001201 //
1202 // CND* can match the following patterns:
1203 //
Tom Stellardcd428182013-09-28 02:50:38 +00001204 // select_cc f32, 0.0, f32, f32, cc_supported
1205 // select_cc f32, 0.0, i32, i32, cc_supported
1206 // select_cc i32, 0, f32, f32, cc_supported
1207 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001208 //
Tom Stellardcd428182013-09-28 02:50:38 +00001209
1210 // Try to move the zero value to the RHS
1211 if (isZero(LHS)) {
1212 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1213 // Try swapping the operands
1214 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1215 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1216 std::swap(LHS, RHS);
1217 CC = DAG.getCondCode(CCSwapped);
1218 } else {
1219 // Try inverting the conditon and then swapping the operands
1220 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1221 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1222 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1223 std::swap(True, False);
1224 std::swap(LHS, RHS);
1225 CC = DAG.getCondCode(CCSwapped);
1226 }
1227 }
1228 }
1229 if (isZero(RHS)) {
1230 SDValue Cond = LHS;
1231 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001232 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1233 if (CompareVT != VT) {
1234 // Bitcast True / False to the correct types. This will end up being
1235 // a nop, but it allows us to define only a single pattern in the
1236 // .TD files for each CND* instruction rather than having to have
1237 // one pattern for integer True/False and one for fp True/False
1238 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1239 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1240 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001241
1242 switch (CCOpcode) {
1243 case ISD::SETONE:
1244 case ISD::SETUNE:
1245 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001246 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1247 Temp = True;
1248 True = False;
1249 False = Temp;
1250 break;
1251 default:
1252 break;
1253 }
1254 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1255 Cond, Zero,
1256 True, False,
1257 DAG.getCondCode(CCOpcode));
1258 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1259 }
1260
Tom Stellard75aadc22012-12-11 21:25:42 +00001261 // If we make it this for it means we have no native instructions to handle
1262 // this SELECT_CC, so we must lower it.
1263 SDValue HWTrue, HWFalse;
1264
1265 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001266 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1267 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001268 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001269 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1270 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001271 }
1272 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001273 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001274 }
1275
1276 // Lower this unsupported SELECT_CC into a combination of two supported
1277 // SELECT_CC operations.
1278 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1279
1280 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1281 Cond, HWFalse,
1282 True, False,
1283 DAG.getCondCode(ISD::SETNE));
1284}
1285
Alp Tokercb402912014-01-24 17:20:08 +00001286/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001287/// convert these pointers to a register index. Each register holds
1288/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1289/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1290/// for indirect addressing.
1291SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1292 unsigned StackWidth,
1293 SelectionDAG &DAG) const {
1294 unsigned SRLPad;
1295 switch(StackWidth) {
1296 case 1:
1297 SRLPad = 2;
1298 break;
1299 case 2:
1300 SRLPad = 3;
1301 break;
1302 case 4:
1303 SRLPad = 4;
1304 break;
1305 default: llvm_unreachable("Invalid stack width");
1306 }
1307
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001308 SDLoc DL(Ptr);
1309 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1310 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001311}
1312
1313void R600TargetLowering::getStackAddress(unsigned StackWidth,
1314 unsigned ElemIdx,
1315 unsigned &Channel,
1316 unsigned &PtrIncr) const {
1317 switch (StackWidth) {
1318 default:
1319 case 1:
1320 Channel = 0;
1321 if (ElemIdx > 0) {
1322 PtrIncr = 1;
1323 } else {
1324 PtrIncr = 0;
1325 }
1326 break;
1327 case 2:
1328 Channel = ElemIdx % 2;
1329 if (ElemIdx == 2) {
1330 PtrIncr = 1;
1331 } else {
1332 PtrIncr = 0;
1333 }
1334 break;
1335 case 4:
1336 Channel = ElemIdx;
1337 PtrIncr = 0;
1338 break;
1339 }
1340}
1341
Tom Stellard75aadc22012-12-11 21:25:42 +00001342SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001343 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001344 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1345 SDValue Chain = Op.getOperand(0);
1346 SDValue Value = Op.getOperand(1);
1347 SDValue Ptr = Op.getOperand(2);
1348
Tom Stellard2ffc3302013-08-26 15:05:44 +00001349 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001350 if (Result.getNode()) {
1351 return Result;
1352 }
1353
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001354 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1355 if (StoreNode->isTruncatingStore()) {
1356 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001357 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001358 EVT MemVT = StoreNode->getMemoryVT();
1359 SDValue MaskConstant;
1360 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001361 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001362 } else {
1363 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001364 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001365 }
1366 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001367 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001368 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001369 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001370 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1371 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001372 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001373 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1374 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1375 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1376 // vector instead.
1377 SDValue Src[4] = {
1378 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001379 DAG.getConstant(0, DL, MVT::i32),
1380 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001381 Mask
1382 };
Craig Topper48d114b2014-04-26 18:35:24 +00001383 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001384 SDValue Args[3] = { Chain, Input, DWordAddr };
1385 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001386 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001387 StoreNode->getMemOperand());
1388 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1389 Value.getValueType().bitsGE(MVT::i32)) {
1390 // Convert pointer from byte address to dword address.
1391 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1392 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001393 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001394
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001395 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001396 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001397 } else {
1398 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1399 }
1400 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001401 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001402 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001403
1404 EVT ValueVT = Value.getValueType();
1405
1406 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1407 return SDValue();
1408 }
1409
Tom Stellarde9373602014-01-22 19:24:14 +00001410 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1411 if (Ret.getNode()) {
1412 return Ret;
1413 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001414 // Lowering for indirect addressing
1415
1416 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001417 const AMDGPUFrameLowering *TFL =
1418 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001419 unsigned StackWidth = TFL->getStackWidth(MF);
1420
1421 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1422
1423 if (ValueVT.isVector()) {
1424 unsigned NumElemVT = ValueVT.getVectorNumElements();
1425 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001426 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001427
1428 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1429 "vector width in load");
1430
1431 for (unsigned i = 0; i < NumElemVT; ++i) {
1432 unsigned Channel, PtrIncr;
1433 getStackAddress(StackWidth, i, Channel, PtrIncr);
1434 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001435 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001436 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001437 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001438
1439 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1440 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001441 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001442 }
Craig Topper48d114b2014-04-26 18:35:24 +00001443 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001444 } else {
1445 if (ValueVT == MVT::i8) {
1446 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1447 }
1448 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001449 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001450 }
1451
1452 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001453}
1454
Tom Stellard365366f2013-01-23 02:09:06 +00001455// return (512 + (kc_bank << 12)
1456static int
1457ConstantAddressBlock(unsigned AddressSpace) {
1458 switch (AddressSpace) {
1459 case AMDGPUAS::CONSTANT_BUFFER_0:
1460 return 512;
1461 case AMDGPUAS::CONSTANT_BUFFER_1:
1462 return 512 + 4096;
1463 case AMDGPUAS::CONSTANT_BUFFER_2:
1464 return 512 + 4096 * 2;
1465 case AMDGPUAS::CONSTANT_BUFFER_3:
1466 return 512 + 4096 * 3;
1467 case AMDGPUAS::CONSTANT_BUFFER_4:
1468 return 512 + 4096 * 4;
1469 case AMDGPUAS::CONSTANT_BUFFER_5:
1470 return 512 + 4096 * 5;
1471 case AMDGPUAS::CONSTANT_BUFFER_6:
1472 return 512 + 4096 * 6;
1473 case AMDGPUAS::CONSTANT_BUFFER_7:
1474 return 512 + 4096 * 7;
1475 case AMDGPUAS::CONSTANT_BUFFER_8:
1476 return 512 + 4096 * 8;
1477 case AMDGPUAS::CONSTANT_BUFFER_9:
1478 return 512 + 4096 * 9;
1479 case AMDGPUAS::CONSTANT_BUFFER_10:
1480 return 512 + 4096 * 10;
1481 case AMDGPUAS::CONSTANT_BUFFER_11:
1482 return 512 + 4096 * 11;
1483 case AMDGPUAS::CONSTANT_BUFFER_12:
1484 return 512 + 4096 * 12;
1485 case AMDGPUAS::CONSTANT_BUFFER_13:
1486 return 512 + 4096 * 13;
1487 case AMDGPUAS::CONSTANT_BUFFER_14:
1488 return 512 + 4096 * 14;
1489 case AMDGPUAS::CONSTANT_BUFFER_15:
1490 return 512 + 4096 * 15;
1491 default:
1492 return -1;
1493 }
1494}
1495
1496SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1497{
1498 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001499 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001500 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1501 SDValue Chain = Op.getOperand(0);
1502 SDValue Ptr = Op.getOperand(1);
1503 SDValue LoweredLoad;
1504
Tom Stellarde9373602014-01-22 19:24:14 +00001505 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1506 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001507 SDValue Ops[2] = {
1508 Ret,
1509 Chain
1510 };
Craig Topper64941d92014-04-27 19:20:57 +00001511 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001512 }
1513
Tom Stellard067c8152014-07-21 14:01:14 +00001514 // Lower loads constant address space global variable loads
1515 if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001516 isa<GlobalVariable>(GetUnderlyingObject(
1517 LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
Tom Stellard067c8152014-07-21 14:01:14 +00001518
1519 SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
1520 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
1521 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001522 DAG.getConstant(2, DL, MVT::i32));
Tom Stellard067c8152014-07-21 14:01:14 +00001523 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1524 LoadNode->getChain(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001525 DAG.getTargetConstant(0, DL, MVT::i32),
1526 Op.getOperand(2));
Tom Stellard067c8152014-07-21 14:01:14 +00001527 }
Tom Stellarde9373602014-01-22 19:24:14 +00001528
Tom Stellard35bb18c2013-08-26 15:06:04 +00001529 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1530 SDValue MergedValues[2] = {
Matt Arsenault83e60582014-07-24 17:10:35 +00001531 ScalarizeVectorLoad(Op, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001532 Chain
1533 };
Craig Topper64941d92014-04-27 19:20:57 +00001534 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001535 }
1536
Tom Stellard365366f2013-01-23 02:09:06 +00001537 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001538 if (ConstantBlock > -1 &&
1539 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1540 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001541 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001542 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1543 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001544 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001545 SDValue Slots[4];
1546 for (unsigned i = 0; i < 4; i++) {
1547 // We want Const position encoded with the following formula :
1548 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1549 // const_index is Ptr computed by llvm using an alignment of 16.
1550 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1551 // then div by 4 at the ISel step
1552 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001553 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001554 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1555 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001556 EVT NewVT = MVT::v4i32;
1557 unsigned NumElements = 4;
1558 if (VT.isVector()) {
1559 NewVT = VT;
1560 NumElements = VT.getVectorNumElements();
1561 }
Craig Topper48d114b2014-04-26 18:35:24 +00001562 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001563 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001564 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001565 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001566 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001567 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1568 DAG.getConstant(4, DL, MVT::i32)),
1569 DAG.getConstant(LoadNode->getAddressSpace() -
1570 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001571 );
1572 }
1573
1574 if (!VT.isVector()) {
1575 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001576 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001577 }
1578
1579 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001580 Result,
1581 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001582 };
Craig Topper64941d92014-04-27 19:20:57 +00001583 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001584 }
1585
Matt Arsenault909d0c02013-10-30 23:43:29 +00001586 // For most operations returning SDValue() will result in the node being
1587 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1588 // need to manually expand loads that may be legal in some address spaces and
1589 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1590 // compute shaders, since the data is sign extended when it is uploaded to the
1591 // buffer. However SEXT loads from other address spaces are not supported, so
1592 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001593 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1594 EVT MemVT = LoadNode->getMemoryVT();
1595 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001596 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1597 LoadNode->getPointerInfo(), MemVT,
1598 LoadNode->isVolatile(),
1599 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001600 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001601 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001602 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1603 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001604
Jan Veselyb670d372015-05-26 18:07:22 +00001605 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001606 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001607 }
1608
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001609 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1610 return SDValue();
1611 }
1612
1613 // Lowering for indirect addressing
1614 const MachineFunction &MF = DAG.getMachineFunction();
Eric Christopher7792e322015-01-30 23:24:40 +00001615 const AMDGPUFrameLowering *TFL =
1616 static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001617 unsigned StackWidth = TFL->getStackWidth(MF);
1618
1619 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1620
1621 if (VT.isVector()) {
1622 unsigned NumElemVT = VT.getVectorNumElements();
1623 EVT ElemVT = VT.getVectorElementType();
1624 SDValue Loads[4];
1625
1626 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1627 "vector width in load");
1628
1629 for (unsigned i = 0; i < NumElemVT; ++i) {
1630 unsigned Channel, PtrIncr;
1631 getStackAddress(StackWidth, i, Channel, PtrIncr);
1632 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001633 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001634 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1635 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001636 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001637 Op.getOperand(2));
1638 }
1639 for (unsigned i = NumElemVT; i < 4; ++i) {
1640 Loads[i] = DAG.getUNDEF(ElemVT);
1641 }
1642 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001643 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001644 } else {
1645 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1646 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001647 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001648 Op.getOperand(2));
1649 }
1650
Matt Arsenault7939acd2014-04-07 16:44:24 +00001651 SDValue Ops[2] = {
1652 LoweredLoad,
1653 Chain
1654 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001655
Craig Topper64941d92014-04-27 19:20:57 +00001656 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001657}
Tom Stellard75aadc22012-12-11 21:25:42 +00001658
Matt Arsenault1d555c42014-06-23 18:00:55 +00001659SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1660 SDValue Chain = Op.getOperand(0);
1661 SDValue Cond = Op.getOperand(1);
1662 SDValue Jump = Op.getOperand(2);
1663
1664 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1665 Chain, Jump, Cond);
1666}
1667
Tom Stellard75aadc22012-12-11 21:25:42 +00001668/// XXX Only kernel functions are supported, so we can assume for now that
1669/// every function is a kernel function, but in the future we should use
1670/// separate calling conventions for kernel and non-kernel functions.
1671SDValue R600TargetLowering::LowerFormalArguments(
1672 SDValue Chain,
1673 CallingConv::ID CallConv,
1674 bool isVarArg,
1675 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001676 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001677 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001678 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001679 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1680 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001681 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001682 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001683
Tom Stellardaf775432013-10-23 00:44:32 +00001684 SmallVector<ISD::InputArg, 8> LocalIns;
1685
Matt Arsenault209a7b92014-04-18 07:40:20 +00001686 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001687
1688 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001689
Tom Stellard1e803092013-07-23 01:48:18 +00001690 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001691 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001692 const ISD::InputArg &In = Ins[i];
1693 EVT VT = In.VT;
1694 EVT MemVT = VA.getLocVT();
1695 if (!VT.isVector() && MemVT.isVector()) {
1696 // Get load source type if scalarized.
1697 MemVT = MemVT.getVectorElementType();
1698 }
Tom Stellard78e01292013-07-23 01:47:58 +00001699
Jan Veselye5121f32014-10-14 20:05:26 +00001700 if (MFI->getShaderType() != ShaderType::COMPUTE) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001701 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1702 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1703 InVals.push_back(Register);
1704 continue;
1705 }
1706
Tom Stellard75aadc22012-12-11 21:25:42 +00001707 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001708 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001709
Matt Arsenaultfae02982014-03-17 18:58:11 +00001710 // i64 isn't a legal type, so the register type used ends up as i32, which
1711 // isn't expected here. It attempts to create this sextload, but it ends up
1712 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1713 // for <1 x i64>.
1714
Tom Stellardacfeebf2013-07-23 01:48:05 +00001715 // The first 36 bytes of the input buffer contains information about
1716 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001717 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1718 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1719 // FIXME: This should really check the extload type, but the handling of
1720 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001721
Matt Arsenault74ef2772014-08-13 18:14:11 +00001722 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1723 Ext = ISD::SEXTLOAD;
1724 }
1725
1726 // Compute the offset from the value.
1727 // XXX - I think PartOffset should give you this, but it seems to give the
1728 // size of the register which isn't useful.
1729
Andrew Trick05938a52015-02-16 18:10:47 +00001730 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001731 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001732 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001733
1734 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1735 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001736 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001737 DAG.getUNDEF(MVT::i32),
1738 PtrInfo,
1739 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001740
1741 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001742 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001743 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001744 }
1745 return Chain;
1746}
1747
Matt Arsenault758659232013-05-18 00:21:46 +00001748EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001749 if (!VT.isVector())
1750 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001751 return VT.changeVectorElementTypeToInteger();
1752}
1753
Matt Arsenault209a7b92014-04-18 07:40:20 +00001754static SDValue CompactSwizzlableVector(
1755 SelectionDAG &DAG, SDValue VectorEntry,
1756 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001757 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1758 assert(RemapSwizzle.empty());
1759 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001760 VectorEntry.getOperand(0),
1761 VectorEntry.getOperand(1),
1762 VectorEntry.getOperand(2),
1763 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001764 };
1765
1766 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001767 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1768 // We mask write here to teach later passes that the ith element of this
1769 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1770 // break false dependencies and additionnaly make assembly easier to read.
1771 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001772 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1773 if (C->isZero()) {
1774 RemapSwizzle[i] = 4; // SEL_0
1775 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1776 } else if (C->isExactlyValue(1.0)) {
1777 RemapSwizzle[i] = 5; // SEL_1
1778 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1779 }
1780 }
1781
1782 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1783 continue;
1784 for (unsigned j = 0; j < i; j++) {
1785 if (NewBldVec[i] == NewBldVec[j]) {
1786 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1787 RemapSwizzle[i] = j;
1788 break;
1789 }
1790 }
1791 }
1792
1793 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001794 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001795}
1796
Benjamin Kramer193960c2013-06-11 13:32:25 +00001797static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1798 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001799 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1800 assert(RemapSwizzle.empty());
1801 SDValue NewBldVec[4] = {
1802 VectorEntry.getOperand(0),
1803 VectorEntry.getOperand(1),
1804 VectorEntry.getOperand(2),
1805 VectorEntry.getOperand(3)
1806 };
1807 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001808 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001809 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001810 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1811 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1812 ->getZExtValue();
1813 if (i == Idx)
1814 isUnmovable[Idx] = true;
1815 }
1816 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001817
1818 for (unsigned i = 0; i < 4; i++) {
1819 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1820 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1821 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001822 if (isUnmovable[Idx])
1823 continue;
1824 // Swap i and Idx
1825 std::swap(NewBldVec[Idx], NewBldVec[i]);
1826 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1827 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001828 }
1829 }
1830
1831 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001832 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001833}
1834
1835
1836SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001837 SDValue Swz[4], SelectionDAG &DAG,
1838 SDLoc DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001839 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1840 // Old -> New swizzle values
1841 DenseMap<unsigned, unsigned> SwizzleRemap;
1842
1843 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1844 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001845 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001846 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001847 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001848 }
1849
1850 SwizzleRemap.clear();
1851 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1852 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001853 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001854 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001855 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001856 }
1857
1858 return BuildVector;
1859}
1860
1861
Tom Stellard75aadc22012-12-11 21:25:42 +00001862//===----------------------------------------------------------------------===//
1863// Custom DAG Optimizations
1864//===----------------------------------------------------------------------===//
1865
1866SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1867 DAGCombinerInfo &DCI) const {
1868 SelectionDAG &DAG = DCI.DAG;
1869
1870 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001871 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001872 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1873 case ISD::FP_ROUND: {
1874 SDValue Arg = N->getOperand(0);
1875 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001876 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001877 Arg.getOperand(0));
1878 }
1879 break;
1880 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001881
1882 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1883 // (i32 select_cc f32, f32, -1, 0 cc)
1884 //
1885 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1886 // this to one of the SET*_DX10 instructions.
1887 case ISD::FP_TO_SINT: {
1888 SDValue FNeg = N->getOperand(0);
1889 if (FNeg.getOpcode() != ISD::FNEG) {
1890 return SDValue();
1891 }
1892 SDValue SelectCC = FNeg.getOperand(0);
1893 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1894 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1895 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1896 !isHWTrueValue(SelectCC.getOperand(2)) ||
1897 !isHWFalseValue(SelectCC.getOperand(3))) {
1898 return SDValue();
1899 }
1900
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001901 SDLoc dl(N);
1902 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001903 SelectCC.getOperand(0), // LHS
1904 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001905 DAG.getConstant(-1, dl, MVT::i32), // True
1906 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001907 SelectCC.getOperand(4)); // CC
1908
1909 break;
1910 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001911
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001912 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1913 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001914 case ISD::INSERT_VECTOR_ELT: {
1915 SDValue InVec = N->getOperand(0);
1916 SDValue InVal = N->getOperand(1);
1917 SDValue EltNo = N->getOperand(2);
1918 SDLoc dl(N);
1919
1920 // If the inserted element is an UNDEF, just use the input vector.
1921 if (InVal.getOpcode() == ISD::UNDEF)
1922 return InVec;
1923
1924 EVT VT = InVec.getValueType();
1925
1926 // If we can't generate a legal BUILD_VECTOR, exit
1927 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1928 return SDValue();
1929
1930 // Check that we know which element is being inserted
1931 if (!isa<ConstantSDNode>(EltNo))
1932 return SDValue();
1933 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1934
1935 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1936 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1937 // vector elements.
1938 SmallVector<SDValue, 8> Ops;
1939 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1940 Ops.append(InVec.getNode()->op_begin(),
1941 InVec.getNode()->op_end());
1942 } else if (InVec.getOpcode() == ISD::UNDEF) {
1943 unsigned NElts = VT.getVectorNumElements();
1944 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1945 } else {
1946 return SDValue();
1947 }
1948
1949 // Insert the element
1950 if (Elt < Ops.size()) {
1951 // All the operands of BUILD_VECTOR must have the same type;
1952 // we enforce that here.
1953 EVT OpVT = Ops[0].getValueType();
1954 if (InVal.getValueType() != OpVT)
1955 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1956 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1957 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1958 Ops[Elt] = InVal;
1959 }
1960
1961 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001962 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001963 }
1964
Tom Stellard365366f2013-01-23 02:09:06 +00001965 // Extract_vec (Build_vector) generated by custom lowering
1966 // also needs to be customly combined
1967 case ISD::EXTRACT_VECTOR_ELT: {
1968 SDValue Arg = N->getOperand(0);
1969 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1970 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1971 unsigned Element = Const->getZExtValue();
1972 return Arg->getOperand(Element);
1973 }
1974 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001975 if (Arg.getOpcode() == ISD::BITCAST &&
1976 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1977 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1978 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001979 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001980 Arg->getOperand(0).getOperand(Element));
1981 }
1982 }
Tom Stellard365366f2013-01-23 02:09:06 +00001983 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001984
1985 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001986 // Try common optimizations
1987 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1988 if (Ret.getNode())
1989 return Ret;
1990
Tom Stellarde06163a2013-02-07 14:02:35 +00001991 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1992 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001993 //
1994 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1995 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001996 SDValue LHS = N->getOperand(0);
1997 if (LHS.getOpcode() != ISD::SELECT_CC) {
1998 return SDValue();
1999 }
2000
2001 SDValue RHS = N->getOperand(1);
2002 SDValue True = N->getOperand(2);
2003 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002004 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002005
2006 if (LHS.getOperand(2).getNode() != True.getNode() ||
2007 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002008 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002009 return SDValue();
2010 }
2011
Tom Stellard5e524892013-03-08 15:37:11 +00002012 switch (NCC) {
2013 default: return SDValue();
2014 case ISD::SETNE: return LHS;
2015 case ISD::SETEQ: {
2016 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2017 LHSCC = ISD::getSetCCInverse(LHSCC,
2018 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002019 if (DCI.isBeforeLegalizeOps() ||
2020 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2021 return DAG.getSelectCC(SDLoc(N),
2022 LHS.getOperand(0),
2023 LHS.getOperand(1),
2024 LHS.getOperand(2),
2025 LHS.getOperand(3),
2026 LHSCC);
2027 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002028 }
Tom Stellard5e524892013-03-08 15:37:11 +00002029 }
Tom Stellardcd428182013-09-28 02:50:38 +00002030 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002031 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002032
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002033 case AMDGPUISD::EXPORT: {
2034 SDValue Arg = N->getOperand(1);
2035 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2036 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002037
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002038 SDValue NewArgs[8] = {
2039 N->getOperand(0), // Chain
2040 SDValue(),
2041 N->getOperand(2), // ArrayBase
2042 N->getOperand(3), // Type
2043 N->getOperand(4), // SWZ_X
2044 N->getOperand(5), // SWZ_Y
2045 N->getOperand(6), // SWZ_Z
2046 N->getOperand(7) // SWZ_W
2047 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002048 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002049 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002050 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002051 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002052 case AMDGPUISD::TEXTURE_FETCH: {
2053 SDValue Arg = N->getOperand(1);
2054 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2055 break;
2056
2057 SDValue NewArgs[19] = {
2058 N->getOperand(0),
2059 N->getOperand(1),
2060 N->getOperand(2),
2061 N->getOperand(3),
2062 N->getOperand(4),
2063 N->getOperand(5),
2064 N->getOperand(6),
2065 N->getOperand(7),
2066 N->getOperand(8),
2067 N->getOperand(9),
2068 N->getOperand(10),
2069 N->getOperand(11),
2070 N->getOperand(12),
2071 N->getOperand(13),
2072 N->getOperand(14),
2073 N->getOperand(15),
2074 N->getOperand(16),
2075 N->getOperand(17),
2076 N->getOperand(18),
2077 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002078 SDLoc DL(N);
2079 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2080 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002081 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002082 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002083
2084 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002085}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002086
2087static bool
2088FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002089 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Eric Christopherfc6de422014-08-05 02:39:49 +00002090 const R600InstrInfo *TII =
2091 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002092 if (!Src.isMachineOpcode())
2093 return false;
2094 switch (Src.getMachineOpcode()) {
2095 case AMDGPU::FNEG_R600:
2096 if (!Neg.getNode())
2097 return false;
2098 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002099 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002100 return true;
2101 case AMDGPU::FABS_R600:
2102 if (!Abs.getNode())
2103 return false;
2104 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002105 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002106 return true;
2107 case AMDGPU::CONST_COPY: {
2108 unsigned Opcode = ParentNode->getMachineOpcode();
2109 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2110
2111 if (!Sel.getNode())
2112 return false;
2113
2114 SDValue CstOffset = Src.getOperand(0);
2115 if (ParentNode->getValueType(0).isVector())
2116 return false;
2117
2118 // Gather constants values
2119 int SrcIndices[] = {
2120 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2121 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2122 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2123 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2124 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2125 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2126 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2127 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2128 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2129 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2130 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2131 };
2132 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002133 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002134 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2135 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2136 continue;
2137 if (HasDst) {
2138 OtherSrcIdx--;
2139 OtherSelIdx--;
2140 }
2141 if (RegisterSDNode *Reg =
2142 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2143 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002144 ConstantSDNode *Cst
2145 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002146 Consts.push_back(Cst->getZExtValue());
2147 }
2148 }
2149 }
2150
Matt Arsenault37c12d72014-05-12 20:42:57 +00002151 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002152 Consts.push_back(Cst->getZExtValue());
2153 if (!TII->fitsConstReadLimitations(Consts)) {
2154 return false;
2155 }
2156
2157 Sel = CstOffset;
2158 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2159 return true;
2160 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002161 case AMDGPU::MOV_IMM_I32:
2162 case AMDGPU::MOV_IMM_F32: {
2163 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2164 uint64_t ImmValue = 0;
2165
2166
2167 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2168 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2169 float FloatValue = FPC->getValueAPF().convertToFloat();
2170 if (FloatValue == 0.0) {
2171 ImmReg = AMDGPU::ZERO;
2172 } else if (FloatValue == 0.5) {
2173 ImmReg = AMDGPU::HALF;
2174 } else if (FloatValue == 1.0) {
2175 ImmReg = AMDGPU::ONE;
2176 } else {
2177 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2178 }
2179 } else {
2180 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2181 uint64_t Value = C->getZExtValue();
2182 if (Value == 0) {
2183 ImmReg = AMDGPU::ZERO;
2184 } else if (Value == 1) {
2185 ImmReg = AMDGPU::ONE_INT;
2186 } else {
2187 ImmValue = Value;
2188 }
2189 }
2190
2191 // Check that we aren't already using an immediate.
2192 // XXX: It's possible for an instruction to have more than one
2193 // immediate operand, but this is not supported yet.
2194 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2195 if (!Imm.getNode())
2196 return false;
2197 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2198 assert(C);
2199 if (C->getZExtValue())
2200 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002201 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002202 }
2203 Src = DAG.getRegister(ImmReg, MVT::i32);
2204 return true;
2205 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002206 default:
2207 return false;
2208 }
2209}
2210
2211
2212/// \brief Fold the instructions after selecting them
2213SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2214 SelectionDAG &DAG) const {
Eric Christopherfc6de422014-08-05 02:39:49 +00002215 const R600InstrInfo *TII =
2216 static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002217 if (!Node->isMachineOpcode())
2218 return Node;
2219 unsigned Opcode = Node->getMachineOpcode();
2220 SDValue FakeOp;
2221
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002222 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002223
2224 if (Opcode == AMDGPU::DOT_4) {
2225 int OperandIdx[] = {
2226 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2227 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2228 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2229 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2230 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2231 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2232 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2233 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002234 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002235 int NegIdx[] = {
2236 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2237 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2238 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2239 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2240 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2241 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2242 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2243 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2244 };
2245 int AbsIdx[] = {
2246 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2247 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2248 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2249 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2250 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2251 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2252 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2253 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2254 };
2255 for (unsigned i = 0; i < 8; i++) {
2256 if (OperandIdx[i] < 0)
2257 return Node;
2258 SDValue &Src = Ops[OperandIdx[i] - 1];
2259 SDValue &Neg = Ops[NegIdx[i] - 1];
2260 SDValue &Abs = Ops[AbsIdx[i] - 1];
2261 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2262 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2263 if (HasDst)
2264 SelIdx--;
2265 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002266 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2267 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2268 }
2269 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2270 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2271 SDValue &Src = Ops[i];
2272 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002273 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2274 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002275 } else if (Opcode == AMDGPU::CLAMP_R600) {
2276 SDValue Src = Node->getOperand(0);
2277 if (!Src.isMachineOpcode() ||
2278 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2279 return Node;
2280 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2281 AMDGPU::OpName::clamp);
2282 if (ClampIdx < 0)
2283 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002284 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002285 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002286 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2287 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2288 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002289 } else {
2290 if (!TII->hasInstrModifiers(Opcode))
2291 return Node;
2292 int OperandIdx[] = {
2293 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2294 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2295 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2296 };
2297 int NegIdx[] = {
2298 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2299 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2300 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2301 };
2302 int AbsIdx[] = {
2303 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2304 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2305 -1
2306 };
2307 for (unsigned i = 0; i < 3; i++) {
2308 if (OperandIdx[i] < 0)
2309 return Node;
2310 SDValue &Src = Ops[OperandIdx[i] - 1];
2311 SDValue &Neg = Ops[NegIdx[i] - 1];
2312 SDValue FakeAbs;
2313 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2314 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2315 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002316 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2317 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002318 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002319 ImmIdx--;
2320 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002321 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002322 SDValue &Imm = Ops[ImmIdx];
2323 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002324 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2325 }
2326 }
2327
2328 return Node;
2329}