blob: b40cb6724d63f32d521611818779730e9fb07058 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000085 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000086
Matt Arsenault4e466652014-04-16 01:41:30 +000087 // Expand sign extension of vectors
88 if (!Subtarget->hasBFE())
89 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
90
91 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
92 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
93
94 if (!Subtarget->hasBFE())
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
97 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
98
99 if (!Subtarget->hasBFE())
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
101 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
103
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
107
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
109
110
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000111 // Legalize loads and stores to the private address space.
112 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000113 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000114 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000115
116 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
117 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000118 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
119 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
120 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
121 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
123 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
124
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000125 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000126 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000127 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000128 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000129 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
130 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000131
Tom Stellard365366f2013-01-23 02:09:06 +0000132 setOperationAction(ISD::LOAD, MVT::i32, Custom);
133 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000134 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
135
Tom Stellard75aadc22012-12-11 21:25:42 +0000136 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000137 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000138 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000139 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000140 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000141
Tom Stellard5f337882014-04-29 23:12:43 +0000142 // These should be replaced by UDVIREM, but it does not happen automatically
143 // during Type Legalization
144 setOperationAction(ISD::UDIV, MVT::i64, Custom);
145 setOperationAction(ISD::UREM, MVT::i64, Custom);
146
Michel Danzer49812b52013-07-10 16:37:07 +0000147 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
148
Tom Stellardb852af52013-03-08 15:37:03 +0000149 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000150 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000151 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000152}
153
154MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
155 MachineInstr * MI, MachineBasicBlock * BB) const {
156 MachineFunction * MF = BB->getParent();
157 MachineRegisterInfo &MRI = MF->getRegInfo();
158 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000159 const R600InstrInfo *TII =
160 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000161
162 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000163 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000164 // Replace LDS_*_RET instruction that don't have any uses with the
165 // equivalent LDS_*_NORET instruction.
166 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000167 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
168 assert(DstIdx != -1);
169 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000170 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
171 return BB;
172
173 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
174 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000175 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
176 NewMI.addOperand(MI->getOperand(i));
177 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000178 } else {
179 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
180 }
181 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::CLAMP_R600: {
183 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
184 AMDGPU::MOV,
185 MI->getOperand(0).getReg(),
186 MI->getOperand(1).getReg());
187 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
188 break;
189 }
190
191 case AMDGPU::FABS_R600: {
192 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
193 AMDGPU::MOV,
194 MI->getOperand(0).getReg(),
195 MI->getOperand(1).getReg());
196 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
197 break;
198 }
199
200 case AMDGPU::FNEG_R600: {
201 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
202 AMDGPU::MOV,
203 MI->getOperand(0).getReg(),
204 MI->getOperand(1).getReg());
205 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
206 break;
207 }
208
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::MASK_WRITE: {
210 unsigned maskedRegister = MI->getOperand(0).getReg();
211 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
212 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
213 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
214 break;
215 }
216
217 case AMDGPU::MOV_IMM_F32:
218 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
219 MI->getOperand(1).getFPImm()->getValueAPF()
220 .bitcastToAPInt().getZExtValue());
221 break;
222 case AMDGPU::MOV_IMM_I32:
223 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
224 MI->getOperand(1).getImm());
225 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000226 case AMDGPU::CONST_COPY: {
227 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
228 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000229 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000230 MI->getOperand(1).getImm());
231 break;
232 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000233
234 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000235 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000236 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000237 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000238
239 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
240 .addOperand(MI->getOperand(0))
241 .addOperand(MI->getOperand(1))
242 .addImm(EOP); // Set End of program bit
243 break;
244 }
245
Tom Stellard75aadc22012-12-11 21:25:42 +0000246 case AMDGPU::TXD: {
247 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
248 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000249 MachineOperand &RID = MI->getOperand(4);
250 MachineOperand &SID = MI->getOperand(5);
251 unsigned TextureId = MI->getOperand(6).getImm();
252 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
253 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000254
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000255 switch (TextureId) {
256 case 5: // Rect
257 CTX = CTY = 0;
258 break;
259 case 6: // Shadow1D
260 SrcW = SrcZ;
261 break;
262 case 7: // Shadow2D
263 SrcW = SrcZ;
264 break;
265 case 8: // ShadowRect
266 CTX = CTY = 0;
267 SrcW = SrcZ;
268 break;
269 case 9: // 1DArray
270 SrcZ = SrcY;
271 CTZ = 0;
272 break;
273 case 10: // 2DArray
274 CTZ = 0;
275 break;
276 case 11: // Shadow1DArray
277 SrcZ = SrcY;
278 CTZ = 0;
279 break;
280 case 12: // Shadow2DArray
281 CTZ = 0;
282 break;
283 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000284 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
285 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000286 .addImm(SrcX)
287 .addImm(SrcY)
288 .addImm(SrcZ)
289 .addImm(SrcW)
290 .addImm(0)
291 .addImm(0)
292 .addImm(0)
293 .addImm(0)
294 .addImm(1)
295 .addImm(2)
296 .addImm(3)
297 .addOperand(RID)
298 .addOperand(SID)
299 .addImm(CTX)
300 .addImm(CTY)
301 .addImm(CTZ)
302 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000303 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
304 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000305 .addImm(SrcX)
306 .addImm(SrcY)
307 .addImm(SrcZ)
308 .addImm(SrcW)
309 .addImm(0)
310 .addImm(0)
311 .addImm(0)
312 .addImm(0)
313 .addImm(1)
314 .addImm(2)
315 .addImm(3)
316 .addOperand(RID)
317 .addOperand(SID)
318 .addImm(CTX)
319 .addImm(CTY)
320 .addImm(CTZ)
321 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
323 .addOperand(MI->getOperand(0))
324 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000325 .addImm(SrcX)
326 .addImm(SrcY)
327 .addImm(SrcZ)
328 .addImm(SrcW)
329 .addImm(0)
330 .addImm(0)
331 .addImm(0)
332 .addImm(0)
333 .addImm(1)
334 .addImm(2)
335 .addImm(3)
336 .addOperand(RID)
337 .addOperand(SID)
338 .addImm(CTX)
339 .addImm(CTY)
340 .addImm(CTZ)
341 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000342 .addReg(T0, RegState::Implicit)
343 .addReg(T1, RegState::Implicit);
344 break;
345 }
346
347 case AMDGPU::TXD_SHADOW: {
348 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
349 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000350 MachineOperand &RID = MI->getOperand(4);
351 MachineOperand &SID = MI->getOperand(5);
352 unsigned TextureId = MI->getOperand(6).getImm();
353 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
354 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
355
356 switch (TextureId) {
357 case 5: // Rect
358 CTX = CTY = 0;
359 break;
360 case 6: // Shadow1D
361 SrcW = SrcZ;
362 break;
363 case 7: // Shadow2D
364 SrcW = SrcZ;
365 break;
366 case 8: // ShadowRect
367 CTX = CTY = 0;
368 SrcW = SrcZ;
369 break;
370 case 9: // 1DArray
371 SrcZ = SrcY;
372 CTZ = 0;
373 break;
374 case 10: // 2DArray
375 CTZ = 0;
376 break;
377 case 11: // Shadow1DArray
378 SrcZ = SrcY;
379 CTZ = 0;
380 break;
381 case 12: // Shadow2DArray
382 CTZ = 0;
383 break;
384 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000385
386 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
387 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000388 .addImm(SrcX)
389 .addImm(SrcY)
390 .addImm(SrcZ)
391 .addImm(SrcW)
392 .addImm(0)
393 .addImm(0)
394 .addImm(0)
395 .addImm(0)
396 .addImm(1)
397 .addImm(2)
398 .addImm(3)
399 .addOperand(RID)
400 .addOperand(SID)
401 .addImm(CTX)
402 .addImm(CTY)
403 .addImm(CTZ)
404 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000405 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
406 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000407 .addImm(SrcX)
408 .addImm(SrcY)
409 .addImm(SrcZ)
410 .addImm(SrcW)
411 .addImm(0)
412 .addImm(0)
413 .addImm(0)
414 .addImm(0)
415 .addImm(1)
416 .addImm(2)
417 .addImm(3)
418 .addOperand(RID)
419 .addOperand(SID)
420 .addImm(CTX)
421 .addImm(CTY)
422 .addImm(CTZ)
423 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000424 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
425 .addOperand(MI->getOperand(0))
426 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000427 .addImm(SrcX)
428 .addImm(SrcY)
429 .addImm(SrcZ)
430 .addImm(SrcW)
431 .addImm(0)
432 .addImm(0)
433 .addImm(0)
434 .addImm(0)
435 .addImm(1)
436 .addImm(2)
437 .addImm(3)
438 .addOperand(RID)
439 .addOperand(SID)
440 .addImm(CTX)
441 .addImm(CTY)
442 .addImm(CTZ)
443 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000444 .addReg(T0, RegState::Implicit)
445 .addReg(T1, RegState::Implicit);
446 break;
447 }
448
449 case AMDGPU::BRANCH:
450 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000451 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000452 break;
453
454 case AMDGPU::BRANCH_COND_f32: {
455 MachineInstr *NewMI =
456 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
457 AMDGPU::PREDICATE_BIT)
458 .addOperand(MI->getOperand(1))
459 .addImm(OPCODE_IS_NOT_ZERO)
460 .addImm(0); // Flags
461 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000462 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000463 .addOperand(MI->getOperand(0))
464 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
465 break;
466 }
467
468 case AMDGPU::BRANCH_COND_i32: {
469 MachineInstr *NewMI =
470 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
471 AMDGPU::PREDICATE_BIT)
472 .addOperand(MI->getOperand(1))
473 .addImm(OPCODE_IS_NOT_ZERO_INT)
474 .addImm(0); // Flags
475 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000477 .addOperand(MI->getOperand(0))
478 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
479 break;
480 }
481
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 case AMDGPU::EG_ExportSwz:
483 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 // Instruction is left unmodified if its not the last one of its type
485 bool isLastInstructionOfItsType = true;
486 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000487 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000488 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000489 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000490 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
491 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
492 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
493 .getImm();
494 if (CurrentInstExportType == InstExportType) {
495 isLastInstructionOfItsType = false;
496 break;
497 }
498 }
499 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000500 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000501 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 return BB;
503 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
504 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
505 .addOperand(MI->getOperand(0))
506 .addOperand(MI->getOperand(1))
507 .addOperand(MI->getOperand(2))
508 .addOperand(MI->getOperand(3))
509 .addOperand(MI->getOperand(4))
510 .addOperand(MI->getOperand(5))
511 .addOperand(MI->getOperand(6))
512 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000513 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 break;
515 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000516 case AMDGPU::RETURN: {
517 // RETURN instructions must have the live-out registers as implicit uses,
518 // otherwise they appear dead.
519 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
520 MachineInstrBuilder MIB(*MF, MI);
521 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
522 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
523 return BB;
524 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 }
526
527 MI->eraseFromParent();
528 return BB;
529}
530
531//===----------------------------------------------------------------------===//
532// Custom DAG Lowering Operations
533//===----------------------------------------------------------------------===//
534
Tom Stellard75aadc22012-12-11 21:25:42 +0000535SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000536 MachineFunction &MF = DAG.getMachineFunction();
537 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000538 switch (Op.getOpcode()) {
539 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000540 case ISD::FCOS:
541 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000544 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000545 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 case ISD::INTRINSIC_VOID: {
547 SDValue Chain = Op.getOperand(0);
548 unsigned IntrinsicID =
549 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
550 switch (IntrinsicID) {
551 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000552 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
553 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000554 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000557 case AMDGPUIntrinsic::R600_store_swizzle: {
558 const SDValue Args[8] = {
559 Chain,
560 Op.getOperand(2), // Export Value
561 Op.getOperand(3), // ArrayBase
562 Op.getOperand(4), // Type
563 DAG.getConstant(0, MVT::i32), // SWZ_X
564 DAG.getConstant(1, MVT::i32), // SWZ_Y
565 DAG.getConstant(2, MVT::i32), // SWZ_Z
566 DAG.getConstant(3, MVT::i32) // SWZ_W
567 };
Craig Topper48d114b2014-04-26 18:35:24 +0000568 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000569 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000570
Tom Stellard75aadc22012-12-11 21:25:42 +0000571 // default for switch(IntrinsicID)
572 default: break;
573 }
574 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
575 break;
576 }
577 case ISD::INTRINSIC_WO_CHAIN: {
578 unsigned IntrinsicID =
579 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
580 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000581 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000582 switch(IntrinsicID) {
583 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000584 case AMDGPUIntrinsic::R600_load_input: {
585 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
586 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
587 MachineFunction &MF = DAG.getMachineFunction();
588 MachineRegisterInfo &MRI = MF.getRegInfo();
589 MRI.addLiveIn(Reg);
590 return DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), Reg, VT);
592 }
593
594 case AMDGPUIntrinsic::R600_interp_input: {
595 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
596 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
597 MachineSDNode *interp;
598 if (ijb < 0) {
599 const MachineFunction &MF = DAG.getMachineFunction();
600 const R600InstrInfo *TII =
601 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
602 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
603 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
604 return DAG.getTargetExtractSubreg(
605 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
606 DL, MVT::f32, SDValue(interp, 0));
607 }
608 MachineFunction &MF = DAG.getMachineFunction();
609 MachineRegisterInfo &MRI = MF.getRegInfo();
610 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
611 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
612 MRI.addLiveIn(RegisterI);
613 MRI.addLiveIn(RegisterJ);
614 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
615 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
616 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
617 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
618
619 if (slot % 4 < 2)
620 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
621 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
622 RegisterJNode, RegisterINode);
623 else
624 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
625 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
626 RegisterJNode, RegisterINode);
627 return SDValue(interp, slot % 2);
628 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000629 case AMDGPUIntrinsic::R600_interp_xy:
630 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000631 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000632 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000633 SDValue RegisterINode = Op.getOperand(2);
634 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000635
Vincent Lejeunef143af32013-11-11 22:10:24 +0000636 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000637 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000638 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000639 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000640 else
641 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000642 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000643 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000644 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
645 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000646 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000647 case AMDGPUIntrinsic::R600_tex:
648 case AMDGPUIntrinsic::R600_texc:
649 case AMDGPUIntrinsic::R600_txl:
650 case AMDGPUIntrinsic::R600_txlc:
651 case AMDGPUIntrinsic::R600_txb:
652 case AMDGPUIntrinsic::R600_txbc:
653 case AMDGPUIntrinsic::R600_txf:
654 case AMDGPUIntrinsic::R600_txq:
655 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000656 case AMDGPUIntrinsic::R600_ddy:
657 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000658 unsigned TextureOp;
659 switch (IntrinsicID) {
660 case AMDGPUIntrinsic::R600_tex:
661 TextureOp = 0;
662 break;
663 case AMDGPUIntrinsic::R600_texc:
664 TextureOp = 1;
665 break;
666 case AMDGPUIntrinsic::R600_txl:
667 TextureOp = 2;
668 break;
669 case AMDGPUIntrinsic::R600_txlc:
670 TextureOp = 3;
671 break;
672 case AMDGPUIntrinsic::R600_txb:
673 TextureOp = 4;
674 break;
675 case AMDGPUIntrinsic::R600_txbc:
676 TextureOp = 5;
677 break;
678 case AMDGPUIntrinsic::R600_txf:
679 TextureOp = 6;
680 break;
681 case AMDGPUIntrinsic::R600_txq:
682 TextureOp = 7;
683 break;
684 case AMDGPUIntrinsic::R600_ddx:
685 TextureOp = 8;
686 break;
687 case AMDGPUIntrinsic::R600_ddy:
688 TextureOp = 9;
689 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000690 case AMDGPUIntrinsic::R600_ldptr:
691 TextureOp = 10;
692 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 default:
694 llvm_unreachable("Unknow Texture Operation");
695 }
696
697 SDValue TexArgs[19] = {
698 DAG.getConstant(TextureOp, MVT::i32),
699 Op.getOperand(1),
700 DAG.getConstant(0, MVT::i32),
701 DAG.getConstant(1, MVT::i32),
702 DAG.getConstant(2, MVT::i32),
703 DAG.getConstant(3, MVT::i32),
704 Op.getOperand(2),
705 Op.getOperand(3),
706 Op.getOperand(4),
707 DAG.getConstant(0, MVT::i32),
708 DAG.getConstant(1, MVT::i32),
709 DAG.getConstant(2, MVT::i32),
710 DAG.getConstant(3, MVT::i32),
711 Op.getOperand(5),
712 Op.getOperand(6),
713 Op.getOperand(7),
714 Op.getOperand(8),
715 Op.getOperand(9),
716 Op.getOperand(10)
717 };
Craig Topper48d114b2014-04-26 18:35:24 +0000718 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000719 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000720 case AMDGPUIntrinsic::AMDGPU_dp4: {
721 SDValue Args[8] = {
722 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
723 DAG.getConstant(0, MVT::i32)),
724 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
725 DAG.getConstant(0, MVT::i32)),
726 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
727 DAG.getConstant(1, MVT::i32)),
728 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
729 DAG.getConstant(1, MVT::i32)),
730 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
731 DAG.getConstant(2, MVT::i32)),
732 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
733 DAG.getConstant(2, MVT::i32)),
734 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
735 DAG.getConstant(3, MVT::i32)),
736 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
737 DAG.getConstant(3, MVT::i32))
738 };
Craig Topper48d114b2014-04-26 18:35:24 +0000739 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000740 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000741
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000742 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000743 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000746 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000747 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000748 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000749 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000752 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000753 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000754 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000755 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000756 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000757 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000758 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000759 return LowerImplicitParameter(DAG, VT, DL, 8);
760
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
763 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000764 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000765 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
766 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
769 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
772 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
775 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
778 AMDGPU::T0_Z, VT);
779 }
780 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
781 break;
782 }
783 } // end switch(Op.getOpcode())
784 return SDValue();
785}
786
787void R600TargetLowering::ReplaceNodeResults(SDNode *N,
788 SmallVectorImpl<SDValue> &Results,
789 SelectionDAG &DAG) const {
790 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000791 default:
792 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
793 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000795 return;
796 case ISD::LOAD: {
797 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
798 Results.push_back(SDValue(Node, 0));
799 Results.push_back(SDValue(Node, 1));
800 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
801 // function
802 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
803 return;
804 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000805 case ISD::STORE:
806 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
807 Results.push_back(SDValue(Node, 0));
808 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 }
810}
811
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000812SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
813 // On hw >= R700, COS/SIN input must be between -1. and 1.
814 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
815 EVT VT = Op.getValueType();
816 SDValue Arg = Op.getOperand(0);
817 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
818 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
819 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
820 DAG.getConstantFP(0.15915494309, MVT::f32)),
821 DAG.getConstantFP(0.5, MVT::f32)));
822 unsigned TrigNode;
823 switch (Op.getOpcode()) {
824 case ISD::FCOS:
825 TrigNode = AMDGPUISD::COS_HW;
826 break;
827 case ISD::FSIN:
828 TrigNode = AMDGPUISD::SIN_HW;
829 break;
830 default:
831 llvm_unreachable("Wrong trig opcode");
832 }
833 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
834 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
835 DAG.getConstantFP(-0.5, MVT::f32)));
836 if (Gen >= AMDGPUSubtarget::R700)
837 return TrigVal;
838 // On R600 hw, COS/SIN input must be between -Pi and Pi.
839 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
840 DAG.getConstantFP(3.14159265359, MVT::f32));
841}
842
Tom Stellard75aadc22012-12-11 21:25:42 +0000843SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
844 return DAG.getNode(
845 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000846 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000847 MVT::i1,
848 Op, DAG.getConstantFP(0.0f, MVT::f32),
849 DAG.getCondCode(ISD::SETNE)
850 );
851}
852
Tom Stellard75aadc22012-12-11 21:25:42 +0000853SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000854 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000855 unsigned DwordOffset) const {
856 unsigned ByteOffset = DwordOffset * 4;
857 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000858 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000859
860 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
861 assert(isInt<16>(ByteOffset));
862
863 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
864 DAG.getConstant(ByteOffset, MVT::i32), // PTR
865 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
866 false, false, false, 0);
867}
868
Tom Stellard75aadc22012-12-11 21:25:42 +0000869bool R600TargetLowering::isZero(SDValue Op) const {
870 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
871 return Cst->isNullValue();
872 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
873 return CstFP->isZero();
874 } else {
875 return false;
876 }
877}
878
879SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000880 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000881 EVT VT = Op.getValueType();
882
883 SDValue LHS = Op.getOperand(0);
884 SDValue RHS = Op.getOperand(1);
885 SDValue True = Op.getOperand(2);
886 SDValue False = Op.getOperand(3);
887 SDValue CC = Op.getOperand(4);
888 SDValue Temp;
889
890 // LHS and RHS are guaranteed to be the same value type
891 EVT CompareVT = LHS.getValueType();
892
893 // Check if we can lower this to a native operation.
894
Tom Stellard2add82d2013-03-08 15:37:09 +0000895 // Try to lower to a SET* instruction:
896 //
897 // SET* can match the following patterns:
898 //
Tom Stellardcd428182013-09-28 02:50:38 +0000899 // select_cc f32, f32, -1, 0, cc_supported
900 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
901 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000902 //
903
904 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000905 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
906 ISD::CondCode InverseCC =
907 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000908 if (isHWTrueValue(False) && isHWFalseValue(True)) {
909 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
910 std::swap(False, True);
911 CC = DAG.getCondCode(InverseCC);
912 } else {
913 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
914 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
915 std::swap(False, True);
916 std::swap(LHS, RHS);
917 CC = DAG.getCondCode(SwapInvCC);
918 }
919 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000920 }
921
922 if (isHWTrueValue(True) && isHWFalseValue(False) &&
923 (CompareVT == VT || VT == MVT::i32)) {
924 // This can be matched by a SET* instruction.
925 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
926 }
927
Tom Stellard75aadc22012-12-11 21:25:42 +0000928 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000929 //
930 // CND* can match the following patterns:
931 //
Tom Stellardcd428182013-09-28 02:50:38 +0000932 // select_cc f32, 0.0, f32, f32, cc_supported
933 // select_cc f32, 0.0, i32, i32, cc_supported
934 // select_cc i32, 0, f32, f32, cc_supported
935 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000936 //
Tom Stellardcd428182013-09-28 02:50:38 +0000937
938 // Try to move the zero value to the RHS
939 if (isZero(LHS)) {
940 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
941 // Try swapping the operands
942 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
943 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
944 std::swap(LHS, RHS);
945 CC = DAG.getCondCode(CCSwapped);
946 } else {
947 // Try inverting the conditon and then swapping the operands
948 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
949 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
950 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
951 std::swap(True, False);
952 std::swap(LHS, RHS);
953 CC = DAG.getCondCode(CCSwapped);
954 }
955 }
956 }
957 if (isZero(RHS)) {
958 SDValue Cond = LHS;
959 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000960 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
961 if (CompareVT != VT) {
962 // Bitcast True / False to the correct types. This will end up being
963 // a nop, but it allows us to define only a single pattern in the
964 // .TD files for each CND* instruction rather than having to have
965 // one pattern for integer True/False and one for fp True/False
966 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
967 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
968 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000969
970 switch (CCOpcode) {
971 case ISD::SETONE:
972 case ISD::SETUNE:
973 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000974 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
975 Temp = True;
976 True = False;
977 False = Temp;
978 break;
979 default:
980 break;
981 }
982 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
983 Cond, Zero,
984 True, False,
985 DAG.getCondCode(CCOpcode));
986 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
987 }
988
Tom Stellard75aadc22012-12-11 21:25:42 +0000989
990 // Possible Min/Max pattern
991 SDValue MinMax = LowerMinMax(Op, DAG);
992 if (MinMax.getNode()) {
993 return MinMax;
994 }
995
996 // If we make it this for it means we have no native instructions to handle
997 // this SELECT_CC, so we must lower it.
998 SDValue HWTrue, HWFalse;
999
1000 if (CompareVT == MVT::f32) {
1001 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1002 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1003 } else if (CompareVT == MVT::i32) {
1004 HWTrue = DAG.getConstant(-1, CompareVT);
1005 HWFalse = DAG.getConstant(0, CompareVT);
1006 }
1007 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001008 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001009 }
1010
1011 // Lower this unsupported SELECT_CC into a combination of two supported
1012 // SELECT_CC operations.
1013 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1014
1015 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1016 Cond, HWFalse,
1017 True, False,
1018 DAG.getCondCode(ISD::SETNE));
1019}
1020
Alp Tokercb402912014-01-24 17:20:08 +00001021/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001022/// convert these pointers to a register index. Each register holds
1023/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1024/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1025/// for indirect addressing.
1026SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1027 unsigned StackWidth,
1028 SelectionDAG &DAG) const {
1029 unsigned SRLPad;
1030 switch(StackWidth) {
1031 case 1:
1032 SRLPad = 2;
1033 break;
1034 case 2:
1035 SRLPad = 3;
1036 break;
1037 case 4:
1038 SRLPad = 4;
1039 break;
1040 default: llvm_unreachable("Invalid stack width");
1041 }
1042
Andrew Trickef9de2a2013-05-25 02:42:55 +00001043 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001044 DAG.getConstant(SRLPad, MVT::i32));
1045}
1046
1047void R600TargetLowering::getStackAddress(unsigned StackWidth,
1048 unsigned ElemIdx,
1049 unsigned &Channel,
1050 unsigned &PtrIncr) const {
1051 switch (StackWidth) {
1052 default:
1053 case 1:
1054 Channel = 0;
1055 if (ElemIdx > 0) {
1056 PtrIncr = 1;
1057 } else {
1058 PtrIncr = 0;
1059 }
1060 break;
1061 case 2:
1062 Channel = ElemIdx % 2;
1063 if (ElemIdx == 2) {
1064 PtrIncr = 1;
1065 } else {
1066 PtrIncr = 0;
1067 }
1068 break;
1069 case 4:
1070 Channel = ElemIdx;
1071 PtrIncr = 0;
1072 break;
1073 }
1074}
1075
Tom Stellard75aadc22012-12-11 21:25:42 +00001076SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001077 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001078 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1079 SDValue Chain = Op.getOperand(0);
1080 SDValue Value = Op.getOperand(1);
1081 SDValue Ptr = Op.getOperand(2);
1082
Tom Stellard2ffc3302013-08-26 15:05:44 +00001083 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001084 if (Result.getNode()) {
1085 return Result;
1086 }
1087
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001088 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1089 if (StoreNode->isTruncatingStore()) {
1090 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001091 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001092 EVT MemVT = StoreNode->getMemoryVT();
1093 SDValue MaskConstant;
1094 if (MemVT == MVT::i8) {
1095 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1096 } else {
1097 assert(MemVT == MVT::i16);
1098 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1099 }
1100 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1101 DAG.getConstant(2, MVT::i32));
1102 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1103 DAG.getConstant(0x00000003, VT));
1104 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1105 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1106 DAG.getConstant(3, VT));
1107 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1108 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1109 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1110 // vector instead.
1111 SDValue Src[4] = {
1112 ShiftedValue,
1113 DAG.getConstant(0, MVT::i32),
1114 DAG.getConstant(0, MVT::i32),
1115 Mask
1116 };
Craig Topper48d114b2014-04-26 18:35:24 +00001117 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001118 SDValue Args[3] = { Chain, Input, DWordAddr };
1119 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001120 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001121 StoreNode->getMemOperand());
1122 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1123 Value.getValueType().bitsGE(MVT::i32)) {
1124 // Convert pointer from byte address to dword address.
1125 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1126 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1127 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001128
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001129 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001130 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001131 } else {
1132 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1133 }
1134 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001135 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001136 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001137
1138 EVT ValueVT = Value.getValueType();
1139
1140 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1141 return SDValue();
1142 }
1143
Tom Stellarde9373602014-01-22 19:24:14 +00001144 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1145 if (Ret.getNode()) {
1146 return Ret;
1147 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001148 // Lowering for indirect addressing
1149
1150 const MachineFunction &MF = DAG.getMachineFunction();
1151 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1152 getTargetMachine().getFrameLowering());
1153 unsigned StackWidth = TFL->getStackWidth(MF);
1154
1155 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1156
1157 if (ValueVT.isVector()) {
1158 unsigned NumElemVT = ValueVT.getVectorNumElements();
1159 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001160 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001161
1162 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1163 "vector width in load");
1164
1165 for (unsigned i = 0; i < NumElemVT; ++i) {
1166 unsigned Channel, PtrIncr;
1167 getStackAddress(StackWidth, i, Channel, PtrIncr);
1168 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1169 DAG.getConstant(PtrIncr, MVT::i32));
1170 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1171 Value, DAG.getConstant(i, MVT::i32));
1172
1173 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1174 Chain, Elem, Ptr,
1175 DAG.getTargetConstant(Channel, MVT::i32));
1176 }
Craig Topper48d114b2014-04-26 18:35:24 +00001177 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001178 } else {
1179 if (ValueVT == MVT::i8) {
1180 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1181 }
1182 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001183 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001184 }
1185
1186 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001187}
1188
Tom Stellard365366f2013-01-23 02:09:06 +00001189// return (512 + (kc_bank << 12)
1190static int
1191ConstantAddressBlock(unsigned AddressSpace) {
1192 switch (AddressSpace) {
1193 case AMDGPUAS::CONSTANT_BUFFER_0:
1194 return 512;
1195 case AMDGPUAS::CONSTANT_BUFFER_1:
1196 return 512 + 4096;
1197 case AMDGPUAS::CONSTANT_BUFFER_2:
1198 return 512 + 4096 * 2;
1199 case AMDGPUAS::CONSTANT_BUFFER_3:
1200 return 512 + 4096 * 3;
1201 case AMDGPUAS::CONSTANT_BUFFER_4:
1202 return 512 + 4096 * 4;
1203 case AMDGPUAS::CONSTANT_BUFFER_5:
1204 return 512 + 4096 * 5;
1205 case AMDGPUAS::CONSTANT_BUFFER_6:
1206 return 512 + 4096 * 6;
1207 case AMDGPUAS::CONSTANT_BUFFER_7:
1208 return 512 + 4096 * 7;
1209 case AMDGPUAS::CONSTANT_BUFFER_8:
1210 return 512 + 4096 * 8;
1211 case AMDGPUAS::CONSTANT_BUFFER_9:
1212 return 512 + 4096 * 9;
1213 case AMDGPUAS::CONSTANT_BUFFER_10:
1214 return 512 + 4096 * 10;
1215 case AMDGPUAS::CONSTANT_BUFFER_11:
1216 return 512 + 4096 * 11;
1217 case AMDGPUAS::CONSTANT_BUFFER_12:
1218 return 512 + 4096 * 12;
1219 case AMDGPUAS::CONSTANT_BUFFER_13:
1220 return 512 + 4096 * 13;
1221 case AMDGPUAS::CONSTANT_BUFFER_14:
1222 return 512 + 4096 * 14;
1223 case AMDGPUAS::CONSTANT_BUFFER_15:
1224 return 512 + 4096 * 15;
1225 default:
1226 return -1;
1227 }
1228}
1229
1230SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1231{
1232 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001233 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001234 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1235 SDValue Chain = Op.getOperand(0);
1236 SDValue Ptr = Op.getOperand(1);
1237 SDValue LoweredLoad;
1238
Tom Stellarde9373602014-01-22 19:24:14 +00001239 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1240 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001241 SDValue Ops[2] = {
1242 Ret,
1243 Chain
1244 };
Craig Topper64941d92014-04-27 19:20:57 +00001245 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001246 }
1247
1248
Tom Stellard35bb18c2013-08-26 15:06:04 +00001249 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1250 SDValue MergedValues[2] = {
1251 SplitVectorLoad(Op, DAG),
1252 Chain
1253 };
Craig Topper64941d92014-04-27 19:20:57 +00001254 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001255 }
1256
Tom Stellard365366f2013-01-23 02:09:06 +00001257 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001258 if (ConstantBlock > -1 &&
1259 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1260 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001261 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001262 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1263 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001264 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001265 SDValue Slots[4];
1266 for (unsigned i = 0; i < 4; i++) {
1267 // We want Const position encoded with the following formula :
1268 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1269 // const_index is Ptr computed by llvm using an alignment of 16.
1270 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1271 // then div by 4 at the ISel step
1272 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1273 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1274 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1275 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001276 EVT NewVT = MVT::v4i32;
1277 unsigned NumElements = 4;
1278 if (VT.isVector()) {
1279 NewVT = VT;
1280 NumElements = VT.getVectorNumElements();
1281 }
Craig Topper48d114b2014-04-26 18:35:24 +00001282 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001283 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001284 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001285 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001286 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001287 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001288 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001289 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001290 );
1291 }
1292
1293 if (!VT.isVector()) {
1294 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1295 DAG.getConstant(0, MVT::i32));
1296 }
1297
1298 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001299 Result,
1300 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001301 };
Craig Topper64941d92014-04-27 19:20:57 +00001302 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001303 }
1304
Matt Arsenault909d0c02013-10-30 23:43:29 +00001305 // For most operations returning SDValue() will result in the node being
1306 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1307 // need to manually expand loads that may be legal in some address spaces and
1308 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1309 // compute shaders, since the data is sign extended when it is uploaded to the
1310 // buffer. However SEXT loads from other address spaces are not supported, so
1311 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001312 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1313 EVT MemVT = LoadNode->getMemoryVT();
1314 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1315 SDValue ShiftAmount =
1316 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1317 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1318 LoadNode->getPointerInfo(), MemVT,
1319 LoadNode->isVolatile(),
1320 LoadNode->isNonTemporal(),
1321 LoadNode->getAlignment());
1322 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1323 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1324
1325 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001326 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001327 }
1328
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001329 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1330 return SDValue();
1331 }
1332
1333 // Lowering for indirect addressing
1334 const MachineFunction &MF = DAG.getMachineFunction();
1335 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1336 getTargetMachine().getFrameLowering());
1337 unsigned StackWidth = TFL->getStackWidth(MF);
1338
1339 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1340
1341 if (VT.isVector()) {
1342 unsigned NumElemVT = VT.getVectorNumElements();
1343 EVT ElemVT = VT.getVectorElementType();
1344 SDValue Loads[4];
1345
1346 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1347 "vector width in load");
1348
1349 for (unsigned i = 0; i < NumElemVT; ++i) {
1350 unsigned Channel, PtrIncr;
1351 getStackAddress(StackWidth, i, Channel, PtrIncr);
1352 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1353 DAG.getConstant(PtrIncr, MVT::i32));
1354 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1355 Chain, Ptr,
1356 DAG.getTargetConstant(Channel, MVT::i32),
1357 Op.getOperand(2));
1358 }
1359 for (unsigned i = NumElemVT; i < 4; ++i) {
1360 Loads[i] = DAG.getUNDEF(ElemVT);
1361 }
1362 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001363 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001364 } else {
1365 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1366 Chain, Ptr,
1367 DAG.getTargetConstant(0, MVT::i32), // Channel
1368 Op.getOperand(2));
1369 }
1370
Matt Arsenault7939acd2014-04-07 16:44:24 +00001371 SDValue Ops[2] = {
1372 LoweredLoad,
1373 Chain
1374 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001375
Craig Topper64941d92014-04-27 19:20:57 +00001376 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001377}
Tom Stellard75aadc22012-12-11 21:25:42 +00001378
Tom Stellard75aadc22012-12-11 21:25:42 +00001379/// XXX Only kernel functions are supported, so we can assume for now that
1380/// every function is a kernel function, but in the future we should use
1381/// separate calling conventions for kernel and non-kernel functions.
1382SDValue R600TargetLowering::LowerFormalArguments(
1383 SDValue Chain,
1384 CallingConv::ID CallConv,
1385 bool isVarArg,
1386 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001387 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001388 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001389 SmallVector<CCValAssign, 16> ArgLocs;
1390 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1391 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001392 MachineFunction &MF = DAG.getMachineFunction();
1393 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001394
Tom Stellardaf775432013-10-23 00:44:32 +00001395 SmallVector<ISD::InputArg, 8> LocalIns;
1396
Matt Arsenault209a7b92014-04-18 07:40:20 +00001397 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001398
1399 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001400
Tom Stellard1e803092013-07-23 01:48:18 +00001401 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001402 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001403 EVT VT = Ins[i].VT;
1404 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001405
Vincent Lejeunef143af32013-11-11 22:10:24 +00001406 if (ShaderType != ShaderType::COMPUTE) {
1407 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1408 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1409 InVals.push_back(Register);
1410 continue;
1411 }
1412
Tom Stellard75aadc22012-12-11 21:25:42 +00001413 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001414 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001415
Matt Arsenaultfae02982014-03-17 18:58:11 +00001416 // i64 isn't a legal type, so the register type used ends up as i32, which
1417 // isn't expected here. It attempts to create this sextload, but it ends up
1418 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1419 // for <1 x i64>.
1420
Tom Stellardacfeebf2013-07-23 01:48:05 +00001421 // The first 36 bytes of the input buffer contains information about
1422 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001423
1424 // FIXME: This should really check the extload type, but the handling of
1425 // extload vecto parameters seems to be broken.
1426 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1427 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1428 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001429 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1430 MachinePointerInfo(UndefValue::get(PtrTy)),
1431 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001432
1433 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001434 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001435 }
1436 return Chain;
1437}
1438
Matt Arsenault758659232013-05-18 00:21:46 +00001439EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001440 if (!VT.isVector())
1441 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001442 return VT.changeVectorElementTypeToInteger();
1443}
1444
Matt Arsenault209a7b92014-04-18 07:40:20 +00001445static SDValue CompactSwizzlableVector(
1446 SelectionDAG &DAG, SDValue VectorEntry,
1447 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001448 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1449 assert(RemapSwizzle.empty());
1450 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001451 VectorEntry.getOperand(0),
1452 VectorEntry.getOperand(1),
1453 VectorEntry.getOperand(2),
1454 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001455 };
1456
1457 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001458 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1459 // We mask write here to teach later passes that the ith element of this
1460 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1461 // break false dependencies and additionnaly make assembly easier to read.
1462 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001463 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1464 if (C->isZero()) {
1465 RemapSwizzle[i] = 4; // SEL_0
1466 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1467 } else if (C->isExactlyValue(1.0)) {
1468 RemapSwizzle[i] = 5; // SEL_1
1469 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1470 }
1471 }
1472
1473 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1474 continue;
1475 for (unsigned j = 0; j < i; j++) {
1476 if (NewBldVec[i] == NewBldVec[j]) {
1477 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1478 RemapSwizzle[i] = j;
1479 break;
1480 }
1481 }
1482 }
1483
1484 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001485 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001486}
1487
Benjamin Kramer193960c2013-06-11 13:32:25 +00001488static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1489 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001490 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1491 assert(RemapSwizzle.empty());
1492 SDValue NewBldVec[4] = {
1493 VectorEntry.getOperand(0),
1494 VectorEntry.getOperand(1),
1495 VectorEntry.getOperand(2),
1496 VectorEntry.getOperand(3)
1497 };
1498 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001499 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001500 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001501 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1502 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1503 ->getZExtValue();
1504 if (i == Idx)
1505 isUnmovable[Idx] = true;
1506 }
1507 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001508
1509 for (unsigned i = 0; i < 4; i++) {
1510 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1511 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1512 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001513 if (isUnmovable[Idx])
1514 continue;
1515 // Swap i and Idx
1516 std::swap(NewBldVec[Idx], NewBldVec[i]);
1517 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1518 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001519 }
1520 }
1521
1522 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001523 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001524}
1525
1526
1527SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1528SDValue Swz[4], SelectionDAG &DAG) const {
1529 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1530 // Old -> New swizzle values
1531 DenseMap<unsigned, unsigned> SwizzleRemap;
1532
1533 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1534 for (unsigned i = 0; i < 4; i++) {
1535 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1536 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1537 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1538 }
1539
1540 SwizzleRemap.clear();
1541 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1542 for (unsigned i = 0; i < 4; i++) {
1543 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1544 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1545 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1546 }
1547
1548 return BuildVector;
1549}
1550
1551
Tom Stellard75aadc22012-12-11 21:25:42 +00001552//===----------------------------------------------------------------------===//
1553// Custom DAG Optimizations
1554//===----------------------------------------------------------------------===//
1555
1556SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1557 DAGCombinerInfo &DCI) const {
1558 SelectionDAG &DAG = DCI.DAG;
1559
1560 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001561 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001562 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1563 case ISD::FP_ROUND: {
1564 SDValue Arg = N->getOperand(0);
1565 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001566 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001567 Arg.getOperand(0));
1568 }
1569 break;
1570 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001571
1572 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1573 // (i32 select_cc f32, f32, -1, 0 cc)
1574 //
1575 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1576 // this to one of the SET*_DX10 instructions.
1577 case ISD::FP_TO_SINT: {
1578 SDValue FNeg = N->getOperand(0);
1579 if (FNeg.getOpcode() != ISD::FNEG) {
1580 return SDValue();
1581 }
1582 SDValue SelectCC = FNeg.getOperand(0);
1583 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1584 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1585 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1586 !isHWTrueValue(SelectCC.getOperand(2)) ||
1587 !isHWFalseValue(SelectCC.getOperand(3))) {
1588 return SDValue();
1589 }
1590
Andrew Trickef9de2a2013-05-25 02:42:55 +00001591 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001592 SelectCC.getOperand(0), // LHS
1593 SelectCC.getOperand(1), // RHS
1594 DAG.getConstant(-1, MVT::i32), // True
1595 DAG.getConstant(0, MVT::i32), // Flase
1596 SelectCC.getOperand(4)); // CC
1597
1598 break;
1599 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001600
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001601 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1602 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001603 case ISD::INSERT_VECTOR_ELT: {
1604 SDValue InVec = N->getOperand(0);
1605 SDValue InVal = N->getOperand(1);
1606 SDValue EltNo = N->getOperand(2);
1607 SDLoc dl(N);
1608
1609 // If the inserted element is an UNDEF, just use the input vector.
1610 if (InVal.getOpcode() == ISD::UNDEF)
1611 return InVec;
1612
1613 EVT VT = InVec.getValueType();
1614
1615 // If we can't generate a legal BUILD_VECTOR, exit
1616 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1617 return SDValue();
1618
1619 // Check that we know which element is being inserted
1620 if (!isa<ConstantSDNode>(EltNo))
1621 return SDValue();
1622 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1623
1624 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1625 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1626 // vector elements.
1627 SmallVector<SDValue, 8> Ops;
1628 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1629 Ops.append(InVec.getNode()->op_begin(),
1630 InVec.getNode()->op_end());
1631 } else if (InVec.getOpcode() == ISD::UNDEF) {
1632 unsigned NElts = VT.getVectorNumElements();
1633 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1634 } else {
1635 return SDValue();
1636 }
1637
1638 // Insert the element
1639 if (Elt < Ops.size()) {
1640 // All the operands of BUILD_VECTOR must have the same type;
1641 // we enforce that here.
1642 EVT OpVT = Ops[0].getValueType();
1643 if (InVal.getValueType() != OpVT)
1644 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1645 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1646 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1647 Ops[Elt] = InVal;
1648 }
1649
1650 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001651 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001652 }
1653
Tom Stellard365366f2013-01-23 02:09:06 +00001654 // Extract_vec (Build_vector) generated by custom lowering
1655 // also needs to be customly combined
1656 case ISD::EXTRACT_VECTOR_ELT: {
1657 SDValue Arg = N->getOperand(0);
1658 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1659 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1660 unsigned Element = Const->getZExtValue();
1661 return Arg->getOperand(Element);
1662 }
1663 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001664 if (Arg.getOpcode() == ISD::BITCAST &&
1665 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1666 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1667 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001668 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001669 Arg->getOperand(0).getOperand(Element));
1670 }
1671 }
Tom Stellard365366f2013-01-23 02:09:06 +00001672 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001673
1674 case ISD::SELECT_CC: {
1675 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1676 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001677 //
1678 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1679 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001680 SDValue LHS = N->getOperand(0);
1681 if (LHS.getOpcode() != ISD::SELECT_CC) {
1682 return SDValue();
1683 }
1684
1685 SDValue RHS = N->getOperand(1);
1686 SDValue True = N->getOperand(2);
1687 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001688 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001689
1690 if (LHS.getOperand(2).getNode() != True.getNode() ||
1691 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001692 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001693 return SDValue();
1694 }
1695
Tom Stellard5e524892013-03-08 15:37:11 +00001696 switch (NCC) {
1697 default: return SDValue();
1698 case ISD::SETNE: return LHS;
1699 case ISD::SETEQ: {
1700 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1701 LHSCC = ISD::getSetCCInverse(LHSCC,
1702 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001703 if (DCI.isBeforeLegalizeOps() ||
1704 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1705 return DAG.getSelectCC(SDLoc(N),
1706 LHS.getOperand(0),
1707 LHS.getOperand(1),
1708 LHS.getOperand(2),
1709 LHS.getOperand(3),
1710 LHSCC);
1711 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001712 }
Tom Stellard5e524892013-03-08 15:37:11 +00001713 }
Tom Stellardcd428182013-09-28 02:50:38 +00001714 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001715 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001716
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001717 case AMDGPUISD::EXPORT: {
1718 SDValue Arg = N->getOperand(1);
1719 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1720 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001721
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001722 SDValue NewArgs[8] = {
1723 N->getOperand(0), // Chain
1724 SDValue(),
1725 N->getOperand(2), // ArrayBase
1726 N->getOperand(3), // Type
1727 N->getOperand(4), // SWZ_X
1728 N->getOperand(5), // SWZ_Y
1729 N->getOperand(6), // SWZ_Z
1730 N->getOperand(7) // SWZ_W
1731 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001732 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001733 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001734 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001735 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001736 case AMDGPUISD::TEXTURE_FETCH: {
1737 SDValue Arg = N->getOperand(1);
1738 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1739 break;
1740
1741 SDValue NewArgs[19] = {
1742 N->getOperand(0),
1743 N->getOperand(1),
1744 N->getOperand(2),
1745 N->getOperand(3),
1746 N->getOperand(4),
1747 N->getOperand(5),
1748 N->getOperand(6),
1749 N->getOperand(7),
1750 N->getOperand(8),
1751 N->getOperand(9),
1752 N->getOperand(10),
1753 N->getOperand(11),
1754 N->getOperand(12),
1755 N->getOperand(13),
1756 N->getOperand(14),
1757 N->getOperand(15),
1758 N->getOperand(16),
1759 N->getOperand(17),
1760 N->getOperand(18),
1761 };
1762 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1763 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00001764 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001765 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001766 }
1767 return SDValue();
1768}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001769
1770static bool
1771FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001772 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001773 const R600InstrInfo *TII =
1774 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1775 if (!Src.isMachineOpcode())
1776 return false;
1777 switch (Src.getMachineOpcode()) {
1778 case AMDGPU::FNEG_R600:
1779 if (!Neg.getNode())
1780 return false;
1781 Src = Src.getOperand(0);
1782 Neg = DAG.getTargetConstant(1, MVT::i32);
1783 return true;
1784 case AMDGPU::FABS_R600:
1785 if (!Abs.getNode())
1786 return false;
1787 Src = Src.getOperand(0);
1788 Abs = DAG.getTargetConstant(1, MVT::i32);
1789 return true;
1790 case AMDGPU::CONST_COPY: {
1791 unsigned Opcode = ParentNode->getMachineOpcode();
1792 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1793
1794 if (!Sel.getNode())
1795 return false;
1796
1797 SDValue CstOffset = Src.getOperand(0);
1798 if (ParentNode->getValueType(0).isVector())
1799 return false;
1800
1801 // Gather constants values
1802 int SrcIndices[] = {
1803 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1804 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1805 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1806 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1807 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1808 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1809 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1810 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1811 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1812 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1813 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1814 };
1815 std::vector<unsigned> Consts;
1816 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1817 int OtherSrcIdx = SrcIndices[i];
1818 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1819 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1820 continue;
1821 if (HasDst) {
1822 OtherSrcIdx--;
1823 OtherSelIdx--;
1824 }
1825 if (RegisterSDNode *Reg =
1826 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1827 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1828 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1829 ParentNode->getOperand(OtherSelIdx));
1830 Consts.push_back(Cst->getZExtValue());
1831 }
1832 }
1833 }
1834
1835 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1836 Consts.push_back(Cst->getZExtValue());
1837 if (!TII->fitsConstReadLimitations(Consts)) {
1838 return false;
1839 }
1840
1841 Sel = CstOffset;
1842 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1843 return true;
1844 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001845 case AMDGPU::MOV_IMM_I32:
1846 case AMDGPU::MOV_IMM_F32: {
1847 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1848 uint64_t ImmValue = 0;
1849
1850
1851 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1852 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1853 float FloatValue = FPC->getValueAPF().convertToFloat();
1854 if (FloatValue == 0.0) {
1855 ImmReg = AMDGPU::ZERO;
1856 } else if (FloatValue == 0.5) {
1857 ImmReg = AMDGPU::HALF;
1858 } else if (FloatValue == 1.0) {
1859 ImmReg = AMDGPU::ONE;
1860 } else {
1861 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1862 }
1863 } else {
1864 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1865 uint64_t Value = C->getZExtValue();
1866 if (Value == 0) {
1867 ImmReg = AMDGPU::ZERO;
1868 } else if (Value == 1) {
1869 ImmReg = AMDGPU::ONE_INT;
1870 } else {
1871 ImmValue = Value;
1872 }
1873 }
1874
1875 // Check that we aren't already using an immediate.
1876 // XXX: It's possible for an instruction to have more than one
1877 // immediate operand, but this is not supported yet.
1878 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1879 if (!Imm.getNode())
1880 return false;
1881 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1882 assert(C);
1883 if (C->getZExtValue())
1884 return false;
1885 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1886 }
1887 Src = DAG.getRegister(ImmReg, MVT::i32);
1888 return true;
1889 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001890 default:
1891 return false;
1892 }
1893}
1894
1895
1896/// \brief Fold the instructions after selecting them
1897SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1898 SelectionDAG &DAG) const {
1899 const R600InstrInfo *TII =
1900 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1901 if (!Node->isMachineOpcode())
1902 return Node;
1903 unsigned Opcode = Node->getMachineOpcode();
1904 SDValue FakeOp;
1905
1906 std::vector<SDValue> Ops;
1907 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1908 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001909 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001910
1911 if (Opcode == AMDGPU::DOT_4) {
1912 int OperandIdx[] = {
1913 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1914 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1915 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1916 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1917 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1918 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1919 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1920 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001921 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001922 int NegIdx[] = {
1923 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1924 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1925 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1926 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1927 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1928 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1929 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1930 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1931 };
1932 int AbsIdx[] = {
1933 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1934 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1935 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1936 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1937 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1938 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1939 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1940 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1941 };
1942 for (unsigned i = 0; i < 8; i++) {
1943 if (OperandIdx[i] < 0)
1944 return Node;
1945 SDValue &Src = Ops[OperandIdx[i] - 1];
1946 SDValue &Neg = Ops[NegIdx[i] - 1];
1947 SDValue &Abs = Ops[AbsIdx[i] - 1];
1948 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1949 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1950 if (HasDst)
1951 SelIdx--;
1952 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001953 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1954 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1955 }
1956 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1957 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1958 SDValue &Src = Ops[i];
1959 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001960 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1961 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001962 } else if (Opcode == AMDGPU::CLAMP_R600) {
1963 SDValue Src = Node->getOperand(0);
1964 if (!Src.isMachineOpcode() ||
1965 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1966 return Node;
1967 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1968 AMDGPU::OpName::clamp);
1969 if (ClampIdx < 0)
1970 return Node;
1971 std::vector<SDValue> Ops;
1972 unsigned NumOp = Src.getNumOperands();
1973 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001974 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001975 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1976 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1977 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001978 } else {
1979 if (!TII->hasInstrModifiers(Opcode))
1980 return Node;
1981 int OperandIdx[] = {
1982 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1983 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1984 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1985 };
1986 int NegIdx[] = {
1987 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1988 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1989 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1990 };
1991 int AbsIdx[] = {
1992 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1993 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1994 -1
1995 };
1996 for (unsigned i = 0; i < 3; i++) {
1997 if (OperandIdx[i] < 0)
1998 return Node;
1999 SDValue &Src = Ops[OperandIdx[i] - 1];
2000 SDValue &Neg = Ops[NegIdx[i] - 1];
2001 SDValue FakeAbs;
2002 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2003 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2004 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002005 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2006 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002007 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002008 ImmIdx--;
2009 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002010 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002011 SDValue &Imm = Ops[ImmIdx];
2012 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002013 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2014 }
2015 }
2016
2017 return Node;
2018}