blob: 73109e73ee06a806a0e09ecca523dccb92cfb698 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Matt Arsenault4e466652014-04-16 01:41:30 +000089 // Expand sign extension of vectors
90 if (!Subtarget->hasBFE())
91 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
92
93 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
95
96 if (!Subtarget->hasBFE())
97 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
109
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
111
112
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000113 // Legalize loads and stores to the private address space.
114 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000115 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000116 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000117
118 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
119 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000120 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
121 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
122 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000124 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
125 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
126
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000127 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000128 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000129 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000130 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000131 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
132 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133
Tom Stellard365366f2013-01-23 02:09:06 +0000134 setOperationAction(ISD::LOAD, MVT::i32, Custom);
135 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000136 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
137
Tom Stellard75aadc22012-12-11 21:25:42 +0000138 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000139 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000140 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000141 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000142 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000143
Tom Stellard5f337882014-04-29 23:12:43 +0000144 // These should be replaced by UDVIREM, but it does not happen automatically
145 // during Type Legalization
146 setOperationAction(ISD::UDIV, MVT::i64, Custom);
147 setOperationAction(ISD::UREM, MVT::i64, Custom);
148
Michel Danzer49812b52013-07-10 16:37:07 +0000149 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
150
Tom Stellardb852af52013-03-08 15:37:03 +0000151 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000152 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000153 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000154}
155
156MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
157 MachineInstr * MI, MachineBasicBlock * BB) const {
158 MachineFunction * MF = BB->getParent();
159 MachineRegisterInfo &MRI = MF->getRegInfo();
160 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000161 const R600InstrInfo *TII =
162 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000163
164 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000165 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000166 // Replace LDS_*_RET instruction that don't have any uses with the
167 // equivalent LDS_*_NORET instruction.
168 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000169 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
170 assert(DstIdx != -1);
171 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000172 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
173 return BB;
174
175 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
176 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000177 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
178 NewMI.addOperand(MI->getOperand(i));
179 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000180 } else {
181 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
182 }
183 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000184 case AMDGPU::CLAMP_R600: {
185 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
186 AMDGPU::MOV,
187 MI->getOperand(0).getReg(),
188 MI->getOperand(1).getReg());
189 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
190 break;
191 }
192
193 case AMDGPU::FABS_R600: {
194 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
195 AMDGPU::MOV,
196 MI->getOperand(0).getReg(),
197 MI->getOperand(1).getReg());
198 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
199 break;
200 }
201
202 case AMDGPU::FNEG_R600: {
203 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
204 AMDGPU::MOV,
205 MI->getOperand(0).getReg(),
206 MI->getOperand(1).getReg());
207 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
208 break;
209 }
210
Tom Stellard75aadc22012-12-11 21:25:42 +0000211 case AMDGPU::MASK_WRITE: {
212 unsigned maskedRegister = MI->getOperand(0).getReg();
213 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
214 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
215 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
216 break;
217 }
218
219 case AMDGPU::MOV_IMM_F32:
220 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
221 MI->getOperand(1).getFPImm()->getValueAPF()
222 .bitcastToAPInt().getZExtValue());
223 break;
224 case AMDGPU::MOV_IMM_I32:
225 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
226 MI->getOperand(1).getImm());
227 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000228 case AMDGPU::CONST_COPY: {
229 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
230 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000231 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000232 MI->getOperand(1).getImm());
233 break;
234 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000235
236 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000237 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000238 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000239 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000240
241 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
242 .addOperand(MI->getOperand(0))
243 .addOperand(MI->getOperand(1))
244 .addImm(EOP); // Set End of program bit
245 break;
246 }
247
Tom Stellard75aadc22012-12-11 21:25:42 +0000248 case AMDGPU::TXD: {
249 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
250 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000251 MachineOperand &RID = MI->getOperand(4);
252 MachineOperand &SID = MI->getOperand(5);
253 unsigned TextureId = MI->getOperand(6).getImm();
254 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
255 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000256
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 switch (TextureId) {
258 case 5: // Rect
259 CTX = CTY = 0;
260 break;
261 case 6: // Shadow1D
262 SrcW = SrcZ;
263 break;
264 case 7: // Shadow2D
265 SrcW = SrcZ;
266 break;
267 case 8: // ShadowRect
268 CTX = CTY = 0;
269 SrcW = SrcZ;
270 break;
271 case 9: // 1DArray
272 SrcZ = SrcY;
273 CTZ = 0;
274 break;
275 case 10: // 2DArray
276 CTZ = 0;
277 break;
278 case 11: // Shadow1DArray
279 SrcZ = SrcY;
280 CTZ = 0;
281 break;
282 case 12: // Shadow2DArray
283 CTZ = 0;
284 break;
285 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000286 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
287 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000288 .addImm(SrcX)
289 .addImm(SrcY)
290 .addImm(SrcZ)
291 .addImm(SrcW)
292 .addImm(0)
293 .addImm(0)
294 .addImm(0)
295 .addImm(0)
296 .addImm(1)
297 .addImm(2)
298 .addImm(3)
299 .addOperand(RID)
300 .addOperand(SID)
301 .addImm(CTX)
302 .addImm(CTY)
303 .addImm(CTZ)
304 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000305 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
306 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000307 .addImm(SrcX)
308 .addImm(SrcY)
309 .addImm(SrcZ)
310 .addImm(SrcW)
311 .addImm(0)
312 .addImm(0)
313 .addImm(0)
314 .addImm(0)
315 .addImm(1)
316 .addImm(2)
317 .addImm(3)
318 .addOperand(RID)
319 .addOperand(SID)
320 .addImm(CTX)
321 .addImm(CTY)
322 .addImm(CTZ)
323 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000324 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
325 .addOperand(MI->getOperand(0))
326 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000327 .addImm(SrcX)
328 .addImm(SrcY)
329 .addImm(SrcZ)
330 .addImm(SrcW)
331 .addImm(0)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(1)
336 .addImm(2)
337 .addImm(3)
338 .addOperand(RID)
339 .addOperand(SID)
340 .addImm(CTX)
341 .addImm(CTY)
342 .addImm(CTZ)
343 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000344 .addReg(T0, RegState::Implicit)
345 .addReg(T1, RegState::Implicit);
346 break;
347 }
348
349 case AMDGPU::TXD_SHADOW: {
350 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
351 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000352 MachineOperand &RID = MI->getOperand(4);
353 MachineOperand &SID = MI->getOperand(5);
354 unsigned TextureId = MI->getOperand(6).getImm();
355 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
356 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
357
358 switch (TextureId) {
359 case 5: // Rect
360 CTX = CTY = 0;
361 break;
362 case 6: // Shadow1D
363 SrcW = SrcZ;
364 break;
365 case 7: // Shadow2D
366 SrcW = SrcZ;
367 break;
368 case 8: // ShadowRect
369 CTX = CTY = 0;
370 SrcW = SrcZ;
371 break;
372 case 9: // 1DArray
373 SrcZ = SrcY;
374 CTZ = 0;
375 break;
376 case 10: // 2DArray
377 CTZ = 0;
378 break;
379 case 11: // Shadow1DArray
380 SrcZ = SrcY;
381 CTZ = 0;
382 break;
383 case 12: // Shadow2DArray
384 CTZ = 0;
385 break;
386 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000387
388 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
389 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000390 .addImm(SrcX)
391 .addImm(SrcY)
392 .addImm(SrcZ)
393 .addImm(SrcW)
394 .addImm(0)
395 .addImm(0)
396 .addImm(0)
397 .addImm(0)
398 .addImm(1)
399 .addImm(2)
400 .addImm(3)
401 .addOperand(RID)
402 .addOperand(SID)
403 .addImm(CTX)
404 .addImm(CTY)
405 .addImm(CTZ)
406 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000407 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
408 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000409 .addImm(SrcX)
410 .addImm(SrcY)
411 .addImm(SrcZ)
412 .addImm(SrcW)
413 .addImm(0)
414 .addImm(0)
415 .addImm(0)
416 .addImm(0)
417 .addImm(1)
418 .addImm(2)
419 .addImm(3)
420 .addOperand(RID)
421 .addOperand(SID)
422 .addImm(CTX)
423 .addImm(CTY)
424 .addImm(CTZ)
425 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000426 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
427 .addOperand(MI->getOperand(0))
428 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000429 .addImm(SrcX)
430 .addImm(SrcY)
431 .addImm(SrcZ)
432 .addImm(SrcW)
433 .addImm(0)
434 .addImm(0)
435 .addImm(0)
436 .addImm(0)
437 .addImm(1)
438 .addImm(2)
439 .addImm(3)
440 .addOperand(RID)
441 .addOperand(SID)
442 .addImm(CTX)
443 .addImm(CTY)
444 .addImm(CTZ)
445 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000446 .addReg(T0, RegState::Implicit)
447 .addReg(T1, RegState::Implicit);
448 break;
449 }
450
451 case AMDGPU::BRANCH:
452 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000453 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 break;
455
456 case AMDGPU::BRANCH_COND_f32: {
457 MachineInstr *NewMI =
458 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
459 AMDGPU::PREDICATE_BIT)
460 .addOperand(MI->getOperand(1))
461 .addImm(OPCODE_IS_NOT_ZERO)
462 .addImm(0); // Flags
463 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000464 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000465 .addOperand(MI->getOperand(0))
466 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
467 break;
468 }
469
470 case AMDGPU::BRANCH_COND_i32: {
471 MachineInstr *NewMI =
472 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
473 AMDGPU::PREDICATE_BIT)
474 .addOperand(MI->getOperand(1))
475 .addImm(OPCODE_IS_NOT_ZERO_INT)
476 .addImm(0); // Flags
477 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000478 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000479 .addOperand(MI->getOperand(0))
480 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
481 break;
482 }
483
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 case AMDGPU::EG_ExportSwz:
485 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000486 // Instruction is left unmodified if its not the last one of its type
487 bool isLastInstructionOfItsType = true;
488 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000489 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000490 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000491 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000492 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
493 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
494 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
495 .getImm();
496 if (CurrentInstExportType == InstExportType) {
497 isLastInstructionOfItsType = false;
498 break;
499 }
500 }
501 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000502 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000503 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000504 return BB;
505 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
506 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
507 .addOperand(MI->getOperand(0))
508 .addOperand(MI->getOperand(1))
509 .addOperand(MI->getOperand(2))
510 .addOperand(MI->getOperand(3))
511 .addOperand(MI->getOperand(4))
512 .addOperand(MI->getOperand(5))
513 .addOperand(MI->getOperand(6))
514 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000515 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 break;
517 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000518 case AMDGPU::RETURN: {
519 // RETURN instructions must have the live-out registers as implicit uses,
520 // otherwise they appear dead.
521 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
522 MachineInstrBuilder MIB(*MF, MI);
523 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
524 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
525 return BB;
526 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000527 }
528
529 MI->eraseFromParent();
530 return BB;
531}
532
533//===----------------------------------------------------------------------===//
534// Custom DAG Lowering Operations
535//===----------------------------------------------------------------------===//
536
Tom Stellard75aadc22012-12-11 21:25:42 +0000537SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000538 MachineFunction &MF = DAG.getMachineFunction();
539 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000540 switch (Op.getOpcode()) {
541 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000542 case ISD::FCOS:
543 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000546 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000547 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000548 case ISD::INTRINSIC_VOID: {
549 SDValue Chain = Op.getOperand(0);
550 unsigned IntrinsicID =
551 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
552 switch (IntrinsicID) {
553 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000554 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
555 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000556 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000557 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000558 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000559 case AMDGPUIntrinsic::R600_store_swizzle: {
560 const SDValue Args[8] = {
561 Chain,
562 Op.getOperand(2), // Export Value
563 Op.getOperand(3), // ArrayBase
564 Op.getOperand(4), // Type
565 DAG.getConstant(0, MVT::i32), // SWZ_X
566 DAG.getConstant(1, MVT::i32), // SWZ_Y
567 DAG.getConstant(2, MVT::i32), // SWZ_Z
568 DAG.getConstant(3, MVT::i32) // SWZ_W
569 };
Craig Topper48d114b2014-04-26 18:35:24 +0000570 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000571 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000572
Tom Stellard75aadc22012-12-11 21:25:42 +0000573 // default for switch(IntrinsicID)
574 default: break;
575 }
576 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
577 break;
578 }
579 case ISD::INTRINSIC_WO_CHAIN: {
580 unsigned IntrinsicID =
581 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
582 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000583 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000584 switch(IntrinsicID) {
585 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000586 case AMDGPUIntrinsic::R600_load_input: {
587 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
588 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
589 MachineFunction &MF = DAG.getMachineFunction();
590 MachineRegisterInfo &MRI = MF.getRegInfo();
591 MRI.addLiveIn(Reg);
592 return DAG.getCopyFromReg(DAG.getEntryNode(),
593 SDLoc(DAG.getEntryNode()), Reg, VT);
594 }
595
596 case AMDGPUIntrinsic::R600_interp_input: {
597 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
598 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
599 MachineSDNode *interp;
600 if (ijb < 0) {
601 const MachineFunction &MF = DAG.getMachineFunction();
602 const R600InstrInfo *TII =
603 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
604 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
605 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
606 return DAG.getTargetExtractSubreg(
607 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
608 DL, MVT::f32, SDValue(interp, 0));
609 }
610 MachineFunction &MF = DAG.getMachineFunction();
611 MachineRegisterInfo &MRI = MF.getRegInfo();
612 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
613 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
614 MRI.addLiveIn(RegisterI);
615 MRI.addLiveIn(RegisterJ);
616 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
617 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
618 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
619 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
620
621 if (slot % 4 < 2)
622 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
623 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
624 RegisterJNode, RegisterINode);
625 else
626 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
627 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
628 RegisterJNode, RegisterINode);
629 return SDValue(interp, slot % 2);
630 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000631 case AMDGPUIntrinsic::R600_interp_xy:
632 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000633 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000634 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000635 SDValue RegisterINode = Op.getOperand(2);
636 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000637
Vincent Lejeunef143af32013-11-11 22:10:24 +0000638 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000639 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000640 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000641 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000642 else
643 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000644 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000645 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000646 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
647 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000648 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000649 case AMDGPUIntrinsic::R600_tex:
650 case AMDGPUIntrinsic::R600_texc:
651 case AMDGPUIntrinsic::R600_txl:
652 case AMDGPUIntrinsic::R600_txlc:
653 case AMDGPUIntrinsic::R600_txb:
654 case AMDGPUIntrinsic::R600_txbc:
655 case AMDGPUIntrinsic::R600_txf:
656 case AMDGPUIntrinsic::R600_txq:
657 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000658 case AMDGPUIntrinsic::R600_ddy:
659 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000660 unsigned TextureOp;
661 switch (IntrinsicID) {
662 case AMDGPUIntrinsic::R600_tex:
663 TextureOp = 0;
664 break;
665 case AMDGPUIntrinsic::R600_texc:
666 TextureOp = 1;
667 break;
668 case AMDGPUIntrinsic::R600_txl:
669 TextureOp = 2;
670 break;
671 case AMDGPUIntrinsic::R600_txlc:
672 TextureOp = 3;
673 break;
674 case AMDGPUIntrinsic::R600_txb:
675 TextureOp = 4;
676 break;
677 case AMDGPUIntrinsic::R600_txbc:
678 TextureOp = 5;
679 break;
680 case AMDGPUIntrinsic::R600_txf:
681 TextureOp = 6;
682 break;
683 case AMDGPUIntrinsic::R600_txq:
684 TextureOp = 7;
685 break;
686 case AMDGPUIntrinsic::R600_ddx:
687 TextureOp = 8;
688 break;
689 case AMDGPUIntrinsic::R600_ddy:
690 TextureOp = 9;
691 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000692 case AMDGPUIntrinsic::R600_ldptr:
693 TextureOp = 10;
694 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000695 default:
696 llvm_unreachable("Unknow Texture Operation");
697 }
698
699 SDValue TexArgs[19] = {
700 DAG.getConstant(TextureOp, MVT::i32),
701 Op.getOperand(1),
702 DAG.getConstant(0, MVT::i32),
703 DAG.getConstant(1, MVT::i32),
704 DAG.getConstant(2, MVT::i32),
705 DAG.getConstant(3, MVT::i32),
706 Op.getOperand(2),
707 Op.getOperand(3),
708 Op.getOperand(4),
709 DAG.getConstant(0, MVT::i32),
710 DAG.getConstant(1, MVT::i32),
711 DAG.getConstant(2, MVT::i32),
712 DAG.getConstant(3, MVT::i32),
713 Op.getOperand(5),
714 Op.getOperand(6),
715 Op.getOperand(7),
716 Op.getOperand(8),
717 Op.getOperand(9),
718 Op.getOperand(10)
719 };
Craig Topper48d114b2014-04-26 18:35:24 +0000720 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000722 case AMDGPUIntrinsic::AMDGPU_dp4: {
723 SDValue Args[8] = {
724 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
725 DAG.getConstant(0, MVT::i32)),
726 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
727 DAG.getConstant(0, MVT::i32)),
728 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
729 DAG.getConstant(1, MVT::i32)),
730 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
731 DAG.getConstant(1, MVT::i32)),
732 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
733 DAG.getConstant(2, MVT::i32)),
734 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
735 DAG.getConstant(2, MVT::i32)),
736 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
737 DAG.getConstant(3, MVT::i32)),
738 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
739 DAG.getConstant(3, MVT::i32))
740 };
Craig Topper48d114b2014-04-26 18:35:24 +0000741 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000742 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000743
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000746 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000747 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000748 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000749 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000752 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000753 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000754 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000755 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000756 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000757 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000758 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000759 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000760 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000761 return LowerImplicitParameter(DAG, VT, DL, 8);
762
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000763 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000764 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
765 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000766 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000767 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
768 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
771 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
774 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
777 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
780 AMDGPU::T0_Z, VT);
781 }
782 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
783 break;
784 }
785 } // end switch(Op.getOpcode())
786 return SDValue();
787}
788
789void R600TargetLowering::ReplaceNodeResults(SDNode *N,
790 SmallVectorImpl<SDValue> &Results,
791 SelectionDAG &DAG) const {
792 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000793 default:
794 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
795 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000796 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000797 return;
798 case ISD::LOAD: {
799 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
800 Results.push_back(SDValue(Node, 0));
801 Results.push_back(SDValue(Node, 1));
802 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
803 // function
804 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
805 return;
806 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000807 case ISD::STORE:
808 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
809 Results.push_back(SDValue(Node, 0));
810 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000811 }
812}
813
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000814SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
815 // On hw >= R700, COS/SIN input must be between -1. and 1.
816 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
817 EVT VT = Op.getValueType();
818 SDValue Arg = Op.getOperand(0);
819 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
820 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
821 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
822 DAG.getConstantFP(0.15915494309, MVT::f32)),
823 DAG.getConstantFP(0.5, MVT::f32)));
824 unsigned TrigNode;
825 switch (Op.getOpcode()) {
826 case ISD::FCOS:
827 TrigNode = AMDGPUISD::COS_HW;
828 break;
829 case ISD::FSIN:
830 TrigNode = AMDGPUISD::SIN_HW;
831 break;
832 default:
833 llvm_unreachable("Wrong trig opcode");
834 }
835 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
836 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
837 DAG.getConstantFP(-0.5, MVT::f32)));
838 if (Gen >= AMDGPUSubtarget::R700)
839 return TrigVal;
840 // On R600 hw, COS/SIN input must be between -Pi and Pi.
841 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
842 DAG.getConstantFP(3.14159265359, MVT::f32));
843}
844
Tom Stellard75aadc22012-12-11 21:25:42 +0000845SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
846 return DAG.getNode(
847 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000848 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000849 MVT::i1,
850 Op, DAG.getConstantFP(0.0f, MVT::f32),
851 DAG.getCondCode(ISD::SETNE)
852 );
853}
854
Tom Stellard75aadc22012-12-11 21:25:42 +0000855SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000856 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000857 unsigned DwordOffset) const {
858 unsigned ByteOffset = DwordOffset * 4;
859 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000860 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000861
862 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
863 assert(isInt<16>(ByteOffset));
864
865 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
866 DAG.getConstant(ByteOffset, MVT::i32), // PTR
867 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
868 false, false, false, 0);
869}
870
Tom Stellard75aadc22012-12-11 21:25:42 +0000871bool R600TargetLowering::isZero(SDValue Op) const {
872 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
873 return Cst->isNullValue();
874 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
875 return CstFP->isZero();
876 } else {
877 return false;
878 }
879}
880
881SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000882 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000883 EVT VT = Op.getValueType();
884
885 SDValue LHS = Op.getOperand(0);
886 SDValue RHS = Op.getOperand(1);
887 SDValue True = Op.getOperand(2);
888 SDValue False = Op.getOperand(3);
889 SDValue CC = Op.getOperand(4);
890 SDValue Temp;
891
892 // LHS and RHS are guaranteed to be the same value type
893 EVT CompareVT = LHS.getValueType();
894
895 // Check if we can lower this to a native operation.
896
Tom Stellard2add82d2013-03-08 15:37:09 +0000897 // Try to lower to a SET* instruction:
898 //
899 // SET* can match the following patterns:
900 //
Tom Stellardcd428182013-09-28 02:50:38 +0000901 // select_cc f32, f32, -1, 0, cc_supported
902 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
903 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000904 //
905
906 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000907 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
908 ISD::CondCode InverseCC =
909 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000910 if (isHWTrueValue(False) && isHWFalseValue(True)) {
911 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
912 std::swap(False, True);
913 CC = DAG.getCondCode(InverseCC);
914 } else {
915 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
916 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
917 std::swap(False, True);
918 std::swap(LHS, RHS);
919 CC = DAG.getCondCode(SwapInvCC);
920 }
921 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000922 }
923
924 if (isHWTrueValue(True) && isHWFalseValue(False) &&
925 (CompareVT == VT || VT == MVT::i32)) {
926 // This can be matched by a SET* instruction.
927 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
928 }
929
Tom Stellard75aadc22012-12-11 21:25:42 +0000930 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000931 //
932 // CND* can match the following patterns:
933 //
Tom Stellardcd428182013-09-28 02:50:38 +0000934 // select_cc f32, 0.0, f32, f32, cc_supported
935 // select_cc f32, 0.0, i32, i32, cc_supported
936 // select_cc i32, 0, f32, f32, cc_supported
937 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000938 //
Tom Stellardcd428182013-09-28 02:50:38 +0000939
940 // Try to move the zero value to the RHS
941 if (isZero(LHS)) {
942 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
943 // Try swapping the operands
944 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
945 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
946 std::swap(LHS, RHS);
947 CC = DAG.getCondCode(CCSwapped);
948 } else {
949 // Try inverting the conditon and then swapping the operands
950 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
951 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
952 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
953 std::swap(True, False);
954 std::swap(LHS, RHS);
955 CC = DAG.getCondCode(CCSwapped);
956 }
957 }
958 }
959 if (isZero(RHS)) {
960 SDValue Cond = LHS;
961 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000962 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
963 if (CompareVT != VT) {
964 // Bitcast True / False to the correct types. This will end up being
965 // a nop, but it allows us to define only a single pattern in the
966 // .TD files for each CND* instruction rather than having to have
967 // one pattern for integer True/False and one for fp True/False
968 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
969 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
970 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000971
972 switch (CCOpcode) {
973 case ISD::SETONE:
974 case ISD::SETUNE:
975 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000976 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
977 Temp = True;
978 True = False;
979 False = Temp;
980 break;
981 default:
982 break;
983 }
984 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
985 Cond, Zero,
986 True, False,
987 DAG.getCondCode(CCOpcode));
988 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
989 }
990
Tom Stellard75aadc22012-12-11 21:25:42 +0000991
992 // Possible Min/Max pattern
993 SDValue MinMax = LowerMinMax(Op, DAG);
994 if (MinMax.getNode()) {
995 return MinMax;
996 }
997
998 // If we make it this for it means we have no native instructions to handle
999 // this SELECT_CC, so we must lower it.
1000 SDValue HWTrue, HWFalse;
1001
1002 if (CompareVT == MVT::f32) {
1003 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1004 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1005 } else if (CompareVT == MVT::i32) {
1006 HWTrue = DAG.getConstant(-1, CompareVT);
1007 HWFalse = DAG.getConstant(0, CompareVT);
1008 }
1009 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001010 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001011 }
1012
1013 // Lower this unsupported SELECT_CC into a combination of two supported
1014 // SELECT_CC operations.
1015 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1016
1017 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1018 Cond, HWFalse,
1019 True, False,
1020 DAG.getCondCode(ISD::SETNE));
1021}
1022
Alp Tokercb402912014-01-24 17:20:08 +00001023/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001024/// convert these pointers to a register index. Each register holds
1025/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1026/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1027/// for indirect addressing.
1028SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1029 unsigned StackWidth,
1030 SelectionDAG &DAG) const {
1031 unsigned SRLPad;
1032 switch(StackWidth) {
1033 case 1:
1034 SRLPad = 2;
1035 break;
1036 case 2:
1037 SRLPad = 3;
1038 break;
1039 case 4:
1040 SRLPad = 4;
1041 break;
1042 default: llvm_unreachable("Invalid stack width");
1043 }
1044
Andrew Trickef9de2a2013-05-25 02:42:55 +00001045 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001046 DAG.getConstant(SRLPad, MVT::i32));
1047}
1048
1049void R600TargetLowering::getStackAddress(unsigned StackWidth,
1050 unsigned ElemIdx,
1051 unsigned &Channel,
1052 unsigned &PtrIncr) const {
1053 switch (StackWidth) {
1054 default:
1055 case 1:
1056 Channel = 0;
1057 if (ElemIdx > 0) {
1058 PtrIncr = 1;
1059 } else {
1060 PtrIncr = 0;
1061 }
1062 break;
1063 case 2:
1064 Channel = ElemIdx % 2;
1065 if (ElemIdx == 2) {
1066 PtrIncr = 1;
1067 } else {
1068 PtrIncr = 0;
1069 }
1070 break;
1071 case 4:
1072 Channel = ElemIdx;
1073 PtrIncr = 0;
1074 break;
1075 }
1076}
1077
Tom Stellard75aadc22012-12-11 21:25:42 +00001078SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001079 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001080 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1081 SDValue Chain = Op.getOperand(0);
1082 SDValue Value = Op.getOperand(1);
1083 SDValue Ptr = Op.getOperand(2);
1084
Tom Stellard2ffc3302013-08-26 15:05:44 +00001085 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001086 if (Result.getNode()) {
1087 return Result;
1088 }
1089
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001090 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1091 if (StoreNode->isTruncatingStore()) {
1092 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001093 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001094 EVT MemVT = StoreNode->getMemoryVT();
1095 SDValue MaskConstant;
1096 if (MemVT == MVT::i8) {
1097 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1098 } else {
1099 assert(MemVT == MVT::i16);
1100 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1101 }
1102 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1103 DAG.getConstant(2, MVT::i32));
1104 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1105 DAG.getConstant(0x00000003, VT));
1106 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1107 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1108 DAG.getConstant(3, VT));
1109 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1110 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1111 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1112 // vector instead.
1113 SDValue Src[4] = {
1114 ShiftedValue,
1115 DAG.getConstant(0, MVT::i32),
1116 DAG.getConstant(0, MVT::i32),
1117 Mask
1118 };
Craig Topper48d114b2014-04-26 18:35:24 +00001119 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001120 SDValue Args[3] = { Chain, Input, DWordAddr };
1121 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001122 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001123 StoreNode->getMemOperand());
1124 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1125 Value.getValueType().bitsGE(MVT::i32)) {
1126 // Convert pointer from byte address to dword address.
1127 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1128 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1129 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001130
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001131 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001132 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001133 } else {
1134 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1135 }
1136 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001137 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001138 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001139
1140 EVT ValueVT = Value.getValueType();
1141
1142 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1143 return SDValue();
1144 }
1145
Tom Stellarde9373602014-01-22 19:24:14 +00001146 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1147 if (Ret.getNode()) {
1148 return Ret;
1149 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001150 // Lowering for indirect addressing
1151
1152 const MachineFunction &MF = DAG.getMachineFunction();
1153 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1154 getTargetMachine().getFrameLowering());
1155 unsigned StackWidth = TFL->getStackWidth(MF);
1156
1157 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1158
1159 if (ValueVT.isVector()) {
1160 unsigned NumElemVT = ValueVT.getVectorNumElements();
1161 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001162 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001163
1164 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1165 "vector width in load");
1166
1167 for (unsigned i = 0; i < NumElemVT; ++i) {
1168 unsigned Channel, PtrIncr;
1169 getStackAddress(StackWidth, i, Channel, PtrIncr);
1170 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1171 DAG.getConstant(PtrIncr, MVT::i32));
1172 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1173 Value, DAG.getConstant(i, MVT::i32));
1174
1175 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1176 Chain, Elem, Ptr,
1177 DAG.getTargetConstant(Channel, MVT::i32));
1178 }
Craig Topper48d114b2014-04-26 18:35:24 +00001179 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001180 } else {
1181 if (ValueVT == MVT::i8) {
1182 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1183 }
1184 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001185 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001186 }
1187
1188 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001189}
1190
Tom Stellard365366f2013-01-23 02:09:06 +00001191// return (512 + (kc_bank << 12)
1192static int
1193ConstantAddressBlock(unsigned AddressSpace) {
1194 switch (AddressSpace) {
1195 case AMDGPUAS::CONSTANT_BUFFER_0:
1196 return 512;
1197 case AMDGPUAS::CONSTANT_BUFFER_1:
1198 return 512 + 4096;
1199 case AMDGPUAS::CONSTANT_BUFFER_2:
1200 return 512 + 4096 * 2;
1201 case AMDGPUAS::CONSTANT_BUFFER_3:
1202 return 512 + 4096 * 3;
1203 case AMDGPUAS::CONSTANT_BUFFER_4:
1204 return 512 + 4096 * 4;
1205 case AMDGPUAS::CONSTANT_BUFFER_5:
1206 return 512 + 4096 * 5;
1207 case AMDGPUAS::CONSTANT_BUFFER_6:
1208 return 512 + 4096 * 6;
1209 case AMDGPUAS::CONSTANT_BUFFER_7:
1210 return 512 + 4096 * 7;
1211 case AMDGPUAS::CONSTANT_BUFFER_8:
1212 return 512 + 4096 * 8;
1213 case AMDGPUAS::CONSTANT_BUFFER_9:
1214 return 512 + 4096 * 9;
1215 case AMDGPUAS::CONSTANT_BUFFER_10:
1216 return 512 + 4096 * 10;
1217 case AMDGPUAS::CONSTANT_BUFFER_11:
1218 return 512 + 4096 * 11;
1219 case AMDGPUAS::CONSTANT_BUFFER_12:
1220 return 512 + 4096 * 12;
1221 case AMDGPUAS::CONSTANT_BUFFER_13:
1222 return 512 + 4096 * 13;
1223 case AMDGPUAS::CONSTANT_BUFFER_14:
1224 return 512 + 4096 * 14;
1225 case AMDGPUAS::CONSTANT_BUFFER_15:
1226 return 512 + 4096 * 15;
1227 default:
1228 return -1;
1229 }
1230}
1231
1232SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1233{
1234 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001235 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001236 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1237 SDValue Chain = Op.getOperand(0);
1238 SDValue Ptr = Op.getOperand(1);
1239 SDValue LoweredLoad;
1240
Tom Stellarde9373602014-01-22 19:24:14 +00001241 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1242 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001243 SDValue Ops[2] = {
1244 Ret,
1245 Chain
1246 };
Craig Topper64941d92014-04-27 19:20:57 +00001247 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001248 }
1249
1250
Tom Stellard35bb18c2013-08-26 15:06:04 +00001251 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1252 SDValue MergedValues[2] = {
1253 SplitVectorLoad(Op, DAG),
1254 Chain
1255 };
Craig Topper64941d92014-04-27 19:20:57 +00001256 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001257 }
1258
Tom Stellard365366f2013-01-23 02:09:06 +00001259 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001260 if (ConstantBlock > -1 &&
1261 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1262 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001263 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001264 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1265 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001266 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001267 SDValue Slots[4];
1268 for (unsigned i = 0; i < 4; i++) {
1269 // We want Const position encoded with the following formula :
1270 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1271 // const_index is Ptr computed by llvm using an alignment of 16.
1272 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1273 // then div by 4 at the ISel step
1274 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1275 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1276 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1277 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001278 EVT NewVT = MVT::v4i32;
1279 unsigned NumElements = 4;
1280 if (VT.isVector()) {
1281 NewVT = VT;
1282 NumElements = VT.getVectorNumElements();
1283 }
Craig Topper48d114b2014-04-26 18:35:24 +00001284 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
1285 ArrayRef<SDValue>(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001286 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001287 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001288 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001289 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001290 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001291 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001292 );
1293 }
1294
1295 if (!VT.isVector()) {
1296 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1297 DAG.getConstant(0, MVT::i32));
1298 }
1299
1300 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001301 Result,
1302 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001303 };
Craig Topper64941d92014-04-27 19:20:57 +00001304 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001305 }
1306
Matt Arsenault909d0c02013-10-30 23:43:29 +00001307 // For most operations returning SDValue() will result in the node being
1308 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1309 // need to manually expand loads that may be legal in some address spaces and
1310 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1311 // compute shaders, since the data is sign extended when it is uploaded to the
1312 // buffer. However SEXT loads from other address spaces are not supported, so
1313 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001314 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1315 EVT MemVT = LoadNode->getMemoryVT();
1316 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1317 SDValue ShiftAmount =
1318 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1319 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1320 LoadNode->getPointerInfo(), MemVT,
1321 LoadNode->isVolatile(),
1322 LoadNode->isNonTemporal(),
1323 LoadNode->getAlignment());
1324 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1325 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1326
1327 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001328 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001329 }
1330
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001331 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1332 return SDValue();
1333 }
1334
1335 // Lowering for indirect addressing
1336 const MachineFunction &MF = DAG.getMachineFunction();
1337 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1338 getTargetMachine().getFrameLowering());
1339 unsigned StackWidth = TFL->getStackWidth(MF);
1340
1341 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1342
1343 if (VT.isVector()) {
1344 unsigned NumElemVT = VT.getVectorNumElements();
1345 EVT ElemVT = VT.getVectorElementType();
1346 SDValue Loads[4];
1347
1348 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1349 "vector width in load");
1350
1351 for (unsigned i = 0; i < NumElemVT; ++i) {
1352 unsigned Channel, PtrIncr;
1353 getStackAddress(StackWidth, i, Channel, PtrIncr);
1354 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1355 DAG.getConstant(PtrIncr, MVT::i32));
1356 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1357 Chain, Ptr,
1358 DAG.getTargetConstant(Channel, MVT::i32),
1359 Op.getOperand(2));
1360 }
1361 for (unsigned i = NumElemVT; i < 4; ++i) {
1362 Loads[i] = DAG.getUNDEF(ElemVT);
1363 }
1364 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001365 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001366 } else {
1367 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1368 Chain, Ptr,
1369 DAG.getTargetConstant(0, MVT::i32), // Channel
1370 Op.getOperand(2));
1371 }
1372
Matt Arsenault7939acd2014-04-07 16:44:24 +00001373 SDValue Ops[2] = {
1374 LoweredLoad,
1375 Chain
1376 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001377
Craig Topper64941d92014-04-27 19:20:57 +00001378 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001379}
Tom Stellard75aadc22012-12-11 21:25:42 +00001380
Tom Stellard75aadc22012-12-11 21:25:42 +00001381/// XXX Only kernel functions are supported, so we can assume for now that
1382/// every function is a kernel function, but in the future we should use
1383/// separate calling conventions for kernel and non-kernel functions.
1384SDValue R600TargetLowering::LowerFormalArguments(
1385 SDValue Chain,
1386 CallingConv::ID CallConv,
1387 bool isVarArg,
1388 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001389 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001390 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001391 SmallVector<CCValAssign, 16> ArgLocs;
1392 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1393 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001394 MachineFunction &MF = DAG.getMachineFunction();
1395 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001396
Tom Stellardaf775432013-10-23 00:44:32 +00001397 SmallVector<ISD::InputArg, 8> LocalIns;
1398
Matt Arsenault209a7b92014-04-18 07:40:20 +00001399 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001400
1401 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001402
Tom Stellard1e803092013-07-23 01:48:18 +00001403 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001404 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001405 EVT VT = Ins[i].VT;
1406 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001407
Vincent Lejeunef143af32013-11-11 22:10:24 +00001408 if (ShaderType != ShaderType::COMPUTE) {
1409 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1410 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1411 InVals.push_back(Register);
1412 continue;
1413 }
1414
Tom Stellard75aadc22012-12-11 21:25:42 +00001415 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001416 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001417
Matt Arsenaultfae02982014-03-17 18:58:11 +00001418 // i64 isn't a legal type, so the register type used ends up as i32, which
1419 // isn't expected here. It attempts to create this sextload, but it ends up
1420 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1421 // for <1 x i64>.
1422
Tom Stellardacfeebf2013-07-23 01:48:05 +00001423 // The first 36 bytes of the input buffer contains information about
1424 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001425
1426 // FIXME: This should really check the extload type, but the handling of
1427 // extload vecto parameters seems to be broken.
1428 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1429 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1430 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001431 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1432 MachinePointerInfo(UndefValue::get(PtrTy)),
1433 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001434
1435 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001436 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001437 }
1438 return Chain;
1439}
1440
Matt Arsenault758659232013-05-18 00:21:46 +00001441EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001442 if (!VT.isVector())
1443 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001444 return VT.changeVectorElementTypeToInteger();
1445}
1446
Matt Arsenault209a7b92014-04-18 07:40:20 +00001447static SDValue CompactSwizzlableVector(
1448 SelectionDAG &DAG, SDValue VectorEntry,
1449 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001450 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1451 assert(RemapSwizzle.empty());
1452 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001453 VectorEntry.getOperand(0),
1454 VectorEntry.getOperand(1),
1455 VectorEntry.getOperand(2),
1456 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001457 };
1458
1459 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001460 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1461 // We mask write here to teach later passes that the ith element of this
1462 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1463 // break false dependencies and additionnaly make assembly easier to read.
1464 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001465 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1466 if (C->isZero()) {
1467 RemapSwizzle[i] = 4; // SEL_0
1468 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1469 } else if (C->isExactlyValue(1.0)) {
1470 RemapSwizzle[i] = 5; // SEL_1
1471 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1472 }
1473 }
1474
1475 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1476 continue;
1477 for (unsigned j = 0; j < i; j++) {
1478 if (NewBldVec[i] == NewBldVec[j]) {
1479 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1480 RemapSwizzle[i] = j;
1481 break;
1482 }
1483 }
1484 }
1485
1486 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001487 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001488}
1489
Benjamin Kramer193960c2013-06-11 13:32:25 +00001490static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1491 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001492 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1493 assert(RemapSwizzle.empty());
1494 SDValue NewBldVec[4] = {
1495 VectorEntry.getOperand(0),
1496 VectorEntry.getOperand(1),
1497 VectorEntry.getOperand(2),
1498 VectorEntry.getOperand(3)
1499 };
1500 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001501 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001502 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001503 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1504 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1505 ->getZExtValue();
1506 if (i == Idx)
1507 isUnmovable[Idx] = true;
1508 }
1509 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001510
1511 for (unsigned i = 0; i < 4; i++) {
1512 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1513 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1514 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001515 if (isUnmovable[Idx])
1516 continue;
1517 // Swap i and Idx
1518 std::swap(NewBldVec[Idx], NewBldVec[i]);
1519 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1520 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001521 }
1522 }
1523
1524 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001525 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001526}
1527
1528
1529SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1530SDValue Swz[4], SelectionDAG &DAG) const {
1531 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1532 // Old -> New swizzle values
1533 DenseMap<unsigned, unsigned> SwizzleRemap;
1534
1535 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1536 for (unsigned i = 0; i < 4; i++) {
1537 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1538 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1539 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1540 }
1541
1542 SwizzleRemap.clear();
1543 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1544 for (unsigned i = 0; i < 4; i++) {
1545 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1546 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1547 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1548 }
1549
1550 return BuildVector;
1551}
1552
1553
Tom Stellard75aadc22012-12-11 21:25:42 +00001554//===----------------------------------------------------------------------===//
1555// Custom DAG Optimizations
1556//===----------------------------------------------------------------------===//
1557
1558SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1559 DAGCombinerInfo &DCI) const {
1560 SelectionDAG &DAG = DCI.DAG;
1561
1562 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001563 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001564 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1565 case ISD::FP_ROUND: {
1566 SDValue Arg = N->getOperand(0);
1567 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001568 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001569 Arg.getOperand(0));
1570 }
1571 break;
1572 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001573
1574 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1575 // (i32 select_cc f32, f32, -1, 0 cc)
1576 //
1577 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1578 // this to one of the SET*_DX10 instructions.
1579 case ISD::FP_TO_SINT: {
1580 SDValue FNeg = N->getOperand(0);
1581 if (FNeg.getOpcode() != ISD::FNEG) {
1582 return SDValue();
1583 }
1584 SDValue SelectCC = FNeg.getOperand(0);
1585 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1586 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1587 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1588 !isHWTrueValue(SelectCC.getOperand(2)) ||
1589 !isHWFalseValue(SelectCC.getOperand(3))) {
1590 return SDValue();
1591 }
1592
Andrew Trickef9de2a2013-05-25 02:42:55 +00001593 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001594 SelectCC.getOperand(0), // LHS
1595 SelectCC.getOperand(1), // RHS
1596 DAG.getConstant(-1, MVT::i32), // True
1597 DAG.getConstant(0, MVT::i32), // Flase
1598 SelectCC.getOperand(4)); // CC
1599
1600 break;
1601 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001602
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001603 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1604 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001605 case ISD::INSERT_VECTOR_ELT: {
1606 SDValue InVec = N->getOperand(0);
1607 SDValue InVal = N->getOperand(1);
1608 SDValue EltNo = N->getOperand(2);
1609 SDLoc dl(N);
1610
1611 // If the inserted element is an UNDEF, just use the input vector.
1612 if (InVal.getOpcode() == ISD::UNDEF)
1613 return InVec;
1614
1615 EVT VT = InVec.getValueType();
1616
1617 // If we can't generate a legal BUILD_VECTOR, exit
1618 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1619 return SDValue();
1620
1621 // Check that we know which element is being inserted
1622 if (!isa<ConstantSDNode>(EltNo))
1623 return SDValue();
1624 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1625
1626 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1627 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1628 // vector elements.
1629 SmallVector<SDValue, 8> Ops;
1630 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1631 Ops.append(InVec.getNode()->op_begin(),
1632 InVec.getNode()->op_end());
1633 } else if (InVec.getOpcode() == ISD::UNDEF) {
1634 unsigned NElts = VT.getVectorNumElements();
1635 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1636 } else {
1637 return SDValue();
1638 }
1639
1640 // Insert the element
1641 if (Elt < Ops.size()) {
1642 // All the operands of BUILD_VECTOR must have the same type;
1643 // we enforce that here.
1644 EVT OpVT = Ops[0].getValueType();
1645 if (InVal.getValueType() != OpVT)
1646 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1647 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1648 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1649 Ops[Elt] = InVal;
1650 }
1651
1652 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001653 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001654 }
1655
Tom Stellard365366f2013-01-23 02:09:06 +00001656 // Extract_vec (Build_vector) generated by custom lowering
1657 // also needs to be customly combined
1658 case ISD::EXTRACT_VECTOR_ELT: {
1659 SDValue Arg = N->getOperand(0);
1660 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1661 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1662 unsigned Element = Const->getZExtValue();
1663 return Arg->getOperand(Element);
1664 }
1665 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001666 if (Arg.getOpcode() == ISD::BITCAST &&
1667 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1668 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1669 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001670 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001671 Arg->getOperand(0).getOperand(Element));
1672 }
1673 }
Tom Stellard365366f2013-01-23 02:09:06 +00001674 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001675
1676 case ISD::SELECT_CC: {
1677 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1678 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001679 //
1680 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1681 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001682 SDValue LHS = N->getOperand(0);
1683 if (LHS.getOpcode() != ISD::SELECT_CC) {
1684 return SDValue();
1685 }
1686
1687 SDValue RHS = N->getOperand(1);
1688 SDValue True = N->getOperand(2);
1689 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001690 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001691
1692 if (LHS.getOperand(2).getNode() != True.getNode() ||
1693 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001694 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001695 return SDValue();
1696 }
1697
Tom Stellard5e524892013-03-08 15:37:11 +00001698 switch (NCC) {
1699 default: return SDValue();
1700 case ISD::SETNE: return LHS;
1701 case ISD::SETEQ: {
1702 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1703 LHSCC = ISD::getSetCCInverse(LHSCC,
1704 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001705 if (DCI.isBeforeLegalizeOps() ||
1706 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1707 return DAG.getSelectCC(SDLoc(N),
1708 LHS.getOperand(0),
1709 LHS.getOperand(1),
1710 LHS.getOperand(2),
1711 LHS.getOperand(3),
1712 LHSCC);
1713 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001714 }
Tom Stellard5e524892013-03-08 15:37:11 +00001715 }
Tom Stellardcd428182013-09-28 02:50:38 +00001716 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001717 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001718
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001719 case AMDGPUISD::EXPORT: {
1720 SDValue Arg = N->getOperand(1);
1721 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1722 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001723
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001724 SDValue NewArgs[8] = {
1725 N->getOperand(0), // Chain
1726 SDValue(),
1727 N->getOperand(2), // ArrayBase
1728 N->getOperand(3), // Type
1729 N->getOperand(4), // SWZ_X
1730 N->getOperand(5), // SWZ_Y
1731 N->getOperand(6), // SWZ_Z
1732 N->getOperand(7) // SWZ_W
1733 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001734 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001735 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001736 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001737 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001738 case AMDGPUISD::TEXTURE_FETCH: {
1739 SDValue Arg = N->getOperand(1);
1740 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1741 break;
1742
1743 SDValue NewArgs[19] = {
1744 N->getOperand(0),
1745 N->getOperand(1),
1746 N->getOperand(2),
1747 N->getOperand(3),
1748 N->getOperand(4),
1749 N->getOperand(5),
1750 N->getOperand(6),
1751 N->getOperand(7),
1752 N->getOperand(8),
1753 N->getOperand(9),
1754 N->getOperand(10),
1755 N->getOperand(11),
1756 N->getOperand(12),
1757 N->getOperand(13),
1758 N->getOperand(14),
1759 N->getOperand(15),
1760 N->getOperand(16),
1761 N->getOperand(17),
1762 N->getOperand(18),
1763 };
1764 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1765 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00001766 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001767 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001768 }
1769 return SDValue();
1770}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001771
1772static bool
1773FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001774 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001775 const R600InstrInfo *TII =
1776 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1777 if (!Src.isMachineOpcode())
1778 return false;
1779 switch (Src.getMachineOpcode()) {
1780 case AMDGPU::FNEG_R600:
1781 if (!Neg.getNode())
1782 return false;
1783 Src = Src.getOperand(0);
1784 Neg = DAG.getTargetConstant(1, MVT::i32);
1785 return true;
1786 case AMDGPU::FABS_R600:
1787 if (!Abs.getNode())
1788 return false;
1789 Src = Src.getOperand(0);
1790 Abs = DAG.getTargetConstant(1, MVT::i32);
1791 return true;
1792 case AMDGPU::CONST_COPY: {
1793 unsigned Opcode = ParentNode->getMachineOpcode();
1794 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1795
1796 if (!Sel.getNode())
1797 return false;
1798
1799 SDValue CstOffset = Src.getOperand(0);
1800 if (ParentNode->getValueType(0).isVector())
1801 return false;
1802
1803 // Gather constants values
1804 int SrcIndices[] = {
1805 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1806 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1807 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1808 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1809 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1810 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1811 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1812 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1813 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1814 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1815 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1816 };
1817 std::vector<unsigned> Consts;
1818 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1819 int OtherSrcIdx = SrcIndices[i];
1820 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1821 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1822 continue;
1823 if (HasDst) {
1824 OtherSrcIdx--;
1825 OtherSelIdx--;
1826 }
1827 if (RegisterSDNode *Reg =
1828 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1829 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1830 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1831 ParentNode->getOperand(OtherSelIdx));
1832 Consts.push_back(Cst->getZExtValue());
1833 }
1834 }
1835 }
1836
1837 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1838 Consts.push_back(Cst->getZExtValue());
1839 if (!TII->fitsConstReadLimitations(Consts)) {
1840 return false;
1841 }
1842
1843 Sel = CstOffset;
1844 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1845 return true;
1846 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001847 case AMDGPU::MOV_IMM_I32:
1848 case AMDGPU::MOV_IMM_F32: {
1849 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1850 uint64_t ImmValue = 0;
1851
1852
1853 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1854 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1855 float FloatValue = FPC->getValueAPF().convertToFloat();
1856 if (FloatValue == 0.0) {
1857 ImmReg = AMDGPU::ZERO;
1858 } else if (FloatValue == 0.5) {
1859 ImmReg = AMDGPU::HALF;
1860 } else if (FloatValue == 1.0) {
1861 ImmReg = AMDGPU::ONE;
1862 } else {
1863 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1864 }
1865 } else {
1866 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1867 uint64_t Value = C->getZExtValue();
1868 if (Value == 0) {
1869 ImmReg = AMDGPU::ZERO;
1870 } else if (Value == 1) {
1871 ImmReg = AMDGPU::ONE_INT;
1872 } else {
1873 ImmValue = Value;
1874 }
1875 }
1876
1877 // Check that we aren't already using an immediate.
1878 // XXX: It's possible for an instruction to have more than one
1879 // immediate operand, but this is not supported yet.
1880 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1881 if (!Imm.getNode())
1882 return false;
1883 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1884 assert(C);
1885 if (C->getZExtValue())
1886 return false;
1887 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1888 }
1889 Src = DAG.getRegister(ImmReg, MVT::i32);
1890 return true;
1891 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001892 default:
1893 return false;
1894 }
1895}
1896
1897
1898/// \brief Fold the instructions after selecting them
1899SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1900 SelectionDAG &DAG) const {
1901 const R600InstrInfo *TII =
1902 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1903 if (!Node->isMachineOpcode())
1904 return Node;
1905 unsigned Opcode = Node->getMachineOpcode();
1906 SDValue FakeOp;
1907
1908 std::vector<SDValue> Ops;
1909 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1910 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001911 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001912
1913 if (Opcode == AMDGPU::DOT_4) {
1914 int OperandIdx[] = {
1915 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1916 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1917 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1918 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1919 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1920 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1921 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1922 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001923 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001924 int NegIdx[] = {
1925 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1926 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1927 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1928 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1929 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1930 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1931 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1932 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1933 };
1934 int AbsIdx[] = {
1935 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1936 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1937 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1938 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1939 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1940 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1941 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1942 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1943 };
1944 for (unsigned i = 0; i < 8; i++) {
1945 if (OperandIdx[i] < 0)
1946 return Node;
1947 SDValue &Src = Ops[OperandIdx[i] - 1];
1948 SDValue &Neg = Ops[NegIdx[i] - 1];
1949 SDValue &Abs = Ops[AbsIdx[i] - 1];
1950 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1951 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1952 if (HasDst)
1953 SelIdx--;
1954 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001955 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1956 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1957 }
1958 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1959 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1960 SDValue &Src = Ops[i];
1961 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001962 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1963 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001964 } else if (Opcode == AMDGPU::CLAMP_R600) {
1965 SDValue Src = Node->getOperand(0);
1966 if (!Src.isMachineOpcode() ||
1967 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1968 return Node;
1969 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1970 AMDGPU::OpName::clamp);
1971 if (ClampIdx < 0)
1972 return Node;
1973 std::vector<SDValue> Ops;
1974 unsigned NumOp = Src.getNumOperands();
1975 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001976 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001977 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1978 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1979 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001980 } else {
1981 if (!TII->hasInstrModifiers(Opcode))
1982 return Node;
1983 int OperandIdx[] = {
1984 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1985 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1986 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1987 };
1988 int NegIdx[] = {
1989 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1990 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1991 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1992 };
1993 int AbsIdx[] = {
1994 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1995 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1996 -1
1997 };
1998 for (unsigned i = 0; i < 3; i++) {
1999 if (OperandIdx[i] < 0)
2000 return Node;
2001 SDValue &Src = Ops[OperandIdx[i] - 1];
2002 SDValue &Neg = Ops[NegIdx[i] - 1];
2003 SDValue FakeAbs;
2004 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2005 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2006 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002007 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2008 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002009 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002010 ImmIdx--;
2011 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002012 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002013 SDValue &Imm = Ops[ImmIdx];
2014 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002015 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2016 }
2017 }
2018
2019 return Node;
2020}