blob: e3bcab02e3fb4a45ec3336e43b4efd60fc1ba0a8 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +000085 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000086
Matt Arsenault4e466652014-04-16 01:41:30 +000087 // Expand sign extension of vectors
88 if (!Subtarget->hasBFE())
89 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
90
91 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
92 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
93
94 if (!Subtarget->hasBFE())
95 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
96 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
97 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
98
99 if (!Subtarget->hasBFE())
100 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
101 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
103
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
107
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
109
110
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000111 // Legalize loads and stores to the private address space.
112 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000113 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000114 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000115
116 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
117 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000118 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
119 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
120 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
121 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000122 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
123 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
124
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000125 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000126 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000127 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000128 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000129 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
130 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000131
Tom Stellard365366f2013-01-23 02:09:06 +0000132 setOperationAction(ISD::LOAD, MVT::i32, Custom);
133 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000134 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
135
Tom Stellard75aadc22012-12-11 21:25:42 +0000136 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000137 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000138 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000139 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000140 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000141
Tom Stellard5f337882014-04-29 23:12:43 +0000142 // These should be replaced by UDVIREM, but it does not happen automatically
143 // during Type Legalization
144 setOperationAction(ISD::UDIV, MVT::i64, Custom);
145 setOperationAction(ISD::UREM, MVT::i64, Custom);
146
Michel Danzer49812b52013-07-10 16:37:07 +0000147 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
148
Tom Stellardb852af52013-03-08 15:37:03 +0000149 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000150 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000151 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000152}
153
154MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
155 MachineInstr * MI, MachineBasicBlock * BB) const {
156 MachineFunction * MF = BB->getParent();
157 MachineRegisterInfo &MRI = MF->getRegInfo();
158 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000159 const R600InstrInfo *TII =
160 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000161
162 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000163 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000164 // Replace LDS_*_RET instruction that don't have any uses with the
165 // equivalent LDS_*_NORET instruction.
166 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000167 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
168 assert(DstIdx != -1);
169 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000170 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
171 return BB;
172
173 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
174 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000175 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
176 NewMI.addOperand(MI->getOperand(i));
177 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000178 } else {
179 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
180 }
181 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::CLAMP_R600: {
183 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
184 AMDGPU::MOV,
185 MI->getOperand(0).getReg(),
186 MI->getOperand(1).getReg());
187 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
188 break;
189 }
190
191 case AMDGPU::FABS_R600: {
192 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
193 AMDGPU::MOV,
194 MI->getOperand(0).getReg(),
195 MI->getOperand(1).getReg());
196 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
197 break;
198 }
199
200 case AMDGPU::FNEG_R600: {
201 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
202 AMDGPU::MOV,
203 MI->getOperand(0).getReg(),
204 MI->getOperand(1).getReg());
205 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
206 break;
207 }
208
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::MASK_WRITE: {
210 unsigned maskedRegister = MI->getOperand(0).getReg();
211 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
212 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
213 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
214 break;
215 }
216
217 case AMDGPU::MOV_IMM_F32:
218 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
219 MI->getOperand(1).getFPImm()->getValueAPF()
220 .bitcastToAPInt().getZExtValue());
221 break;
222 case AMDGPU::MOV_IMM_I32:
223 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
224 MI->getOperand(1).getImm());
225 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000226 case AMDGPU::CONST_COPY: {
227 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
228 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000229 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000230 MI->getOperand(1).getImm());
231 break;
232 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000233
234 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000235 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000236 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000237 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000238
239 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
240 .addOperand(MI->getOperand(0))
241 .addOperand(MI->getOperand(1))
242 .addImm(EOP); // Set End of program bit
243 break;
244 }
245
Tom Stellard75aadc22012-12-11 21:25:42 +0000246 case AMDGPU::TXD: {
247 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
248 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000249 MachineOperand &RID = MI->getOperand(4);
250 MachineOperand &SID = MI->getOperand(5);
251 unsigned TextureId = MI->getOperand(6).getImm();
252 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
253 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000254
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000255 switch (TextureId) {
256 case 5: // Rect
257 CTX = CTY = 0;
258 break;
259 case 6: // Shadow1D
260 SrcW = SrcZ;
261 break;
262 case 7: // Shadow2D
263 SrcW = SrcZ;
264 break;
265 case 8: // ShadowRect
266 CTX = CTY = 0;
267 SrcW = SrcZ;
268 break;
269 case 9: // 1DArray
270 SrcZ = SrcY;
271 CTZ = 0;
272 break;
273 case 10: // 2DArray
274 CTZ = 0;
275 break;
276 case 11: // Shadow1DArray
277 SrcZ = SrcY;
278 CTZ = 0;
279 break;
280 case 12: // Shadow2DArray
281 CTZ = 0;
282 break;
283 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000284 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
285 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000286 .addImm(SrcX)
287 .addImm(SrcY)
288 .addImm(SrcZ)
289 .addImm(SrcW)
290 .addImm(0)
291 .addImm(0)
292 .addImm(0)
293 .addImm(0)
294 .addImm(1)
295 .addImm(2)
296 .addImm(3)
297 .addOperand(RID)
298 .addOperand(SID)
299 .addImm(CTX)
300 .addImm(CTY)
301 .addImm(CTZ)
302 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000303 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
304 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000305 .addImm(SrcX)
306 .addImm(SrcY)
307 .addImm(SrcZ)
308 .addImm(SrcW)
309 .addImm(0)
310 .addImm(0)
311 .addImm(0)
312 .addImm(0)
313 .addImm(1)
314 .addImm(2)
315 .addImm(3)
316 .addOperand(RID)
317 .addOperand(SID)
318 .addImm(CTX)
319 .addImm(CTY)
320 .addImm(CTZ)
321 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
323 .addOperand(MI->getOperand(0))
324 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000325 .addImm(SrcX)
326 .addImm(SrcY)
327 .addImm(SrcZ)
328 .addImm(SrcW)
329 .addImm(0)
330 .addImm(0)
331 .addImm(0)
332 .addImm(0)
333 .addImm(1)
334 .addImm(2)
335 .addImm(3)
336 .addOperand(RID)
337 .addOperand(SID)
338 .addImm(CTX)
339 .addImm(CTY)
340 .addImm(CTZ)
341 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000342 .addReg(T0, RegState::Implicit)
343 .addReg(T1, RegState::Implicit);
344 break;
345 }
346
347 case AMDGPU::TXD_SHADOW: {
348 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
349 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000350 MachineOperand &RID = MI->getOperand(4);
351 MachineOperand &SID = MI->getOperand(5);
352 unsigned TextureId = MI->getOperand(6).getImm();
353 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
354 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
355
356 switch (TextureId) {
357 case 5: // Rect
358 CTX = CTY = 0;
359 break;
360 case 6: // Shadow1D
361 SrcW = SrcZ;
362 break;
363 case 7: // Shadow2D
364 SrcW = SrcZ;
365 break;
366 case 8: // ShadowRect
367 CTX = CTY = 0;
368 SrcW = SrcZ;
369 break;
370 case 9: // 1DArray
371 SrcZ = SrcY;
372 CTZ = 0;
373 break;
374 case 10: // 2DArray
375 CTZ = 0;
376 break;
377 case 11: // Shadow1DArray
378 SrcZ = SrcY;
379 CTZ = 0;
380 break;
381 case 12: // Shadow2DArray
382 CTZ = 0;
383 break;
384 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000385
386 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
387 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000388 .addImm(SrcX)
389 .addImm(SrcY)
390 .addImm(SrcZ)
391 .addImm(SrcW)
392 .addImm(0)
393 .addImm(0)
394 .addImm(0)
395 .addImm(0)
396 .addImm(1)
397 .addImm(2)
398 .addImm(3)
399 .addOperand(RID)
400 .addOperand(SID)
401 .addImm(CTX)
402 .addImm(CTY)
403 .addImm(CTZ)
404 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000405 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
406 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000407 .addImm(SrcX)
408 .addImm(SrcY)
409 .addImm(SrcZ)
410 .addImm(SrcW)
411 .addImm(0)
412 .addImm(0)
413 .addImm(0)
414 .addImm(0)
415 .addImm(1)
416 .addImm(2)
417 .addImm(3)
418 .addOperand(RID)
419 .addOperand(SID)
420 .addImm(CTX)
421 .addImm(CTY)
422 .addImm(CTZ)
423 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000424 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
425 .addOperand(MI->getOperand(0))
426 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000427 .addImm(SrcX)
428 .addImm(SrcY)
429 .addImm(SrcZ)
430 .addImm(SrcW)
431 .addImm(0)
432 .addImm(0)
433 .addImm(0)
434 .addImm(0)
435 .addImm(1)
436 .addImm(2)
437 .addImm(3)
438 .addOperand(RID)
439 .addOperand(SID)
440 .addImm(CTX)
441 .addImm(CTY)
442 .addImm(CTZ)
443 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000444 .addReg(T0, RegState::Implicit)
445 .addReg(T1, RegState::Implicit);
446 break;
447 }
448
449 case AMDGPU::BRANCH:
450 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000451 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000452 break;
453
454 case AMDGPU::BRANCH_COND_f32: {
455 MachineInstr *NewMI =
456 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
457 AMDGPU::PREDICATE_BIT)
458 .addOperand(MI->getOperand(1))
459 .addImm(OPCODE_IS_NOT_ZERO)
460 .addImm(0); // Flags
461 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000462 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000463 .addOperand(MI->getOperand(0))
464 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
465 break;
466 }
467
468 case AMDGPU::BRANCH_COND_i32: {
469 MachineInstr *NewMI =
470 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
471 AMDGPU::PREDICATE_BIT)
472 .addOperand(MI->getOperand(1))
473 .addImm(OPCODE_IS_NOT_ZERO_INT)
474 .addImm(0); // Flags
475 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000476 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000477 .addOperand(MI->getOperand(0))
478 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
479 break;
480 }
481
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 case AMDGPU::EG_ExportSwz:
483 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 // Instruction is left unmodified if its not the last one of its type
485 bool isLastInstructionOfItsType = true;
486 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000487 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000488 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000489 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000490 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
491 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
492 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
493 .getImm();
494 if (CurrentInstExportType == InstExportType) {
495 isLastInstructionOfItsType = false;
496 break;
497 }
498 }
499 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000500 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000501 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 return BB;
503 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
504 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
505 .addOperand(MI->getOperand(0))
506 .addOperand(MI->getOperand(1))
507 .addOperand(MI->getOperand(2))
508 .addOperand(MI->getOperand(3))
509 .addOperand(MI->getOperand(4))
510 .addOperand(MI->getOperand(5))
511 .addOperand(MI->getOperand(6))
512 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000513 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 break;
515 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000516 case AMDGPU::RETURN: {
517 // RETURN instructions must have the live-out registers as implicit uses,
518 // otherwise they appear dead.
519 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
520 MachineInstrBuilder MIB(*MF, MI);
521 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
522 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
523 return BB;
524 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 }
526
527 MI->eraseFromParent();
528 return BB;
529}
530
531//===----------------------------------------------------------------------===//
532// Custom DAG Lowering Operations
533//===----------------------------------------------------------------------===//
534
Tom Stellard75aadc22012-12-11 21:25:42 +0000535SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000536 MachineFunction &MF = DAG.getMachineFunction();
537 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000538 switch (Op.getOpcode()) {
539 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000540 case ISD::FCOS:
541 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000544 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000545 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 case ISD::INTRINSIC_VOID: {
547 SDValue Chain = Op.getOperand(0);
548 unsigned IntrinsicID =
549 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
550 switch (IntrinsicID) {
551 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000552 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
553 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000554 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000557 case AMDGPUIntrinsic::R600_store_swizzle: {
558 const SDValue Args[8] = {
559 Chain,
560 Op.getOperand(2), // Export Value
561 Op.getOperand(3), // ArrayBase
562 Op.getOperand(4), // Type
563 DAG.getConstant(0, MVT::i32), // SWZ_X
564 DAG.getConstant(1, MVT::i32), // SWZ_Y
565 DAG.getConstant(2, MVT::i32), // SWZ_Z
566 DAG.getConstant(3, MVT::i32) // SWZ_W
567 };
Craig Topper48d114b2014-04-26 18:35:24 +0000568 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000569 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000570
Tom Stellard75aadc22012-12-11 21:25:42 +0000571 // default for switch(IntrinsicID)
572 default: break;
573 }
574 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
575 break;
576 }
577 case ISD::INTRINSIC_WO_CHAIN: {
578 unsigned IntrinsicID =
579 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
580 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000581 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000582 switch(IntrinsicID) {
583 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000584 case AMDGPUIntrinsic::R600_load_input: {
585 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
586 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
587 MachineFunction &MF = DAG.getMachineFunction();
588 MachineRegisterInfo &MRI = MF.getRegInfo();
589 MRI.addLiveIn(Reg);
590 return DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), Reg, VT);
592 }
593
594 case AMDGPUIntrinsic::R600_interp_input: {
595 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
596 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
597 MachineSDNode *interp;
598 if (ijb < 0) {
599 const MachineFunction &MF = DAG.getMachineFunction();
600 const R600InstrInfo *TII =
601 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
602 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
603 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
604 return DAG.getTargetExtractSubreg(
605 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
606 DL, MVT::f32, SDValue(interp, 0));
607 }
608 MachineFunction &MF = DAG.getMachineFunction();
609 MachineRegisterInfo &MRI = MF.getRegInfo();
610 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
611 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
612 MRI.addLiveIn(RegisterI);
613 MRI.addLiveIn(RegisterJ);
614 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
615 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
616 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
617 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
618
619 if (slot % 4 < 2)
620 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
621 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
622 RegisterJNode, RegisterINode);
623 else
624 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
625 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
626 RegisterJNode, RegisterINode);
627 return SDValue(interp, slot % 2);
628 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000629 case AMDGPUIntrinsic::R600_interp_xy:
630 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000631 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000632 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000633 SDValue RegisterINode = Op.getOperand(2);
634 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000635
Vincent Lejeunef143af32013-11-11 22:10:24 +0000636 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000637 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000638 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000639 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000640 else
641 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000642 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000643 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000644 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
645 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000646 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000647 case AMDGPUIntrinsic::R600_tex:
648 case AMDGPUIntrinsic::R600_texc:
649 case AMDGPUIntrinsic::R600_txl:
650 case AMDGPUIntrinsic::R600_txlc:
651 case AMDGPUIntrinsic::R600_txb:
652 case AMDGPUIntrinsic::R600_txbc:
653 case AMDGPUIntrinsic::R600_txf:
654 case AMDGPUIntrinsic::R600_txq:
655 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000656 case AMDGPUIntrinsic::R600_ddy:
657 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000658 unsigned TextureOp;
659 switch (IntrinsicID) {
660 case AMDGPUIntrinsic::R600_tex:
661 TextureOp = 0;
662 break;
663 case AMDGPUIntrinsic::R600_texc:
664 TextureOp = 1;
665 break;
666 case AMDGPUIntrinsic::R600_txl:
667 TextureOp = 2;
668 break;
669 case AMDGPUIntrinsic::R600_txlc:
670 TextureOp = 3;
671 break;
672 case AMDGPUIntrinsic::R600_txb:
673 TextureOp = 4;
674 break;
675 case AMDGPUIntrinsic::R600_txbc:
676 TextureOp = 5;
677 break;
678 case AMDGPUIntrinsic::R600_txf:
679 TextureOp = 6;
680 break;
681 case AMDGPUIntrinsic::R600_txq:
682 TextureOp = 7;
683 break;
684 case AMDGPUIntrinsic::R600_ddx:
685 TextureOp = 8;
686 break;
687 case AMDGPUIntrinsic::R600_ddy:
688 TextureOp = 9;
689 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000690 case AMDGPUIntrinsic::R600_ldptr:
691 TextureOp = 10;
692 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000693 default:
694 llvm_unreachable("Unknow Texture Operation");
695 }
696
697 SDValue TexArgs[19] = {
698 DAG.getConstant(TextureOp, MVT::i32),
699 Op.getOperand(1),
700 DAG.getConstant(0, MVT::i32),
701 DAG.getConstant(1, MVT::i32),
702 DAG.getConstant(2, MVT::i32),
703 DAG.getConstant(3, MVT::i32),
704 Op.getOperand(2),
705 Op.getOperand(3),
706 Op.getOperand(4),
707 DAG.getConstant(0, MVT::i32),
708 DAG.getConstant(1, MVT::i32),
709 DAG.getConstant(2, MVT::i32),
710 DAG.getConstant(3, MVT::i32),
711 Op.getOperand(5),
712 Op.getOperand(6),
713 Op.getOperand(7),
714 Op.getOperand(8),
715 Op.getOperand(9),
716 Op.getOperand(10)
717 };
Craig Topper48d114b2014-04-26 18:35:24 +0000718 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000719 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000720 case AMDGPUIntrinsic::AMDGPU_dp4: {
721 SDValue Args[8] = {
722 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
723 DAG.getConstant(0, MVT::i32)),
724 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
725 DAG.getConstant(0, MVT::i32)),
726 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
727 DAG.getConstant(1, MVT::i32)),
728 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
729 DAG.getConstant(1, MVT::i32)),
730 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
731 DAG.getConstant(2, MVT::i32)),
732 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
733 DAG.getConstant(2, MVT::i32)),
734 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
735 DAG.getConstant(3, MVT::i32)),
736 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
737 DAG.getConstant(3, MVT::i32))
738 };
Craig Topper48d114b2014-04-26 18:35:24 +0000739 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000740 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000741
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000742 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000743 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000746 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000747 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000748 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000749 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000752 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000753 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000754 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000755 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000756 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000757 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000758 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000759 return LowerImplicitParameter(DAG, VT, DL, 8);
760
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
763 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000764 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000765 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
766 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
769 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
772 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
775 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
778 AMDGPU::T0_Z, VT);
779 }
780 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
781 break;
782 }
783 } // end switch(Op.getOpcode())
784 return SDValue();
785}
786
787void R600TargetLowering::ReplaceNodeResults(SDNode *N,
788 SmallVectorImpl<SDValue> &Results,
789 SelectionDAG &DAG) const {
790 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000791 default:
792 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
793 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000795 return;
796 case ISD::LOAD: {
797 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
798 Results.push_back(SDValue(Node, 0));
799 Results.push_back(SDValue(Node, 1));
800 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
801 // function
802 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
803 return;
804 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000805 case ISD::STORE:
806 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
807 Results.push_back(SDValue(Node, 0));
808 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 }
810}
811
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000812SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
813 // On hw >= R700, COS/SIN input must be between -1. and 1.
814 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
815 EVT VT = Op.getValueType();
816 SDValue Arg = Op.getOperand(0);
817 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
818 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
819 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
820 DAG.getConstantFP(0.15915494309, MVT::f32)),
821 DAG.getConstantFP(0.5, MVT::f32)));
822 unsigned TrigNode;
823 switch (Op.getOpcode()) {
824 case ISD::FCOS:
825 TrigNode = AMDGPUISD::COS_HW;
826 break;
827 case ISD::FSIN:
828 TrigNode = AMDGPUISD::SIN_HW;
829 break;
830 default:
831 llvm_unreachable("Wrong trig opcode");
832 }
833 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
834 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
835 DAG.getConstantFP(-0.5, MVT::f32)));
836 if (Gen >= AMDGPUSubtarget::R700)
837 return TrigVal;
838 // On R600 hw, COS/SIN input must be between -Pi and Pi.
839 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
840 DAG.getConstantFP(3.14159265359, MVT::f32));
841}
842
Tom Stellard75aadc22012-12-11 21:25:42 +0000843SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
844 return DAG.getNode(
845 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000846 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000847 MVT::i1,
848 Op, DAG.getConstantFP(0.0f, MVT::f32),
849 DAG.getCondCode(ISD::SETNE)
850 );
851}
852
Tom Stellard75aadc22012-12-11 21:25:42 +0000853SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000854 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000855 unsigned DwordOffset) const {
856 unsigned ByteOffset = DwordOffset * 4;
857 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000858 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000859
860 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
861 assert(isInt<16>(ByteOffset));
862
863 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
864 DAG.getConstant(ByteOffset, MVT::i32), // PTR
865 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
866 false, false, false, 0);
867}
868
Tom Stellard75aadc22012-12-11 21:25:42 +0000869bool R600TargetLowering::isZero(SDValue Op) const {
870 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
871 return Cst->isNullValue();
872 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
873 return CstFP->isZero();
874 } else {
875 return false;
876 }
877}
878
879SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000880 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000881 EVT VT = Op.getValueType();
882
883 SDValue LHS = Op.getOperand(0);
884 SDValue RHS = Op.getOperand(1);
885 SDValue True = Op.getOperand(2);
886 SDValue False = Op.getOperand(3);
887 SDValue CC = Op.getOperand(4);
888 SDValue Temp;
889
890 // LHS and RHS are guaranteed to be the same value type
891 EVT CompareVT = LHS.getValueType();
892
893 // Check if we can lower this to a native operation.
894
Tom Stellard2add82d2013-03-08 15:37:09 +0000895 // Try to lower to a SET* instruction:
896 //
897 // SET* can match the following patterns:
898 //
Tom Stellardcd428182013-09-28 02:50:38 +0000899 // select_cc f32, f32, -1, 0, cc_supported
900 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
901 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000902 //
903
904 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000905 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
906 ISD::CondCode InverseCC =
907 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000908 if (isHWTrueValue(False) && isHWFalseValue(True)) {
909 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
910 std::swap(False, True);
911 CC = DAG.getCondCode(InverseCC);
912 } else {
913 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
914 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
915 std::swap(False, True);
916 std::swap(LHS, RHS);
917 CC = DAG.getCondCode(SwapInvCC);
918 }
919 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000920 }
921
922 if (isHWTrueValue(True) && isHWFalseValue(False) &&
923 (CompareVT == VT || VT == MVT::i32)) {
924 // This can be matched by a SET* instruction.
925 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
926 }
927
Tom Stellard75aadc22012-12-11 21:25:42 +0000928 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000929 //
930 // CND* can match the following patterns:
931 //
Tom Stellardcd428182013-09-28 02:50:38 +0000932 // select_cc f32, 0.0, f32, f32, cc_supported
933 // select_cc f32, 0.0, i32, i32, cc_supported
934 // select_cc i32, 0, f32, f32, cc_supported
935 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000936 //
Tom Stellardcd428182013-09-28 02:50:38 +0000937
938 // Try to move the zero value to the RHS
939 if (isZero(LHS)) {
940 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
941 // Try swapping the operands
942 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
943 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
944 std::swap(LHS, RHS);
945 CC = DAG.getCondCode(CCSwapped);
946 } else {
947 // Try inverting the conditon and then swapping the operands
948 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
949 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
950 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
951 std::swap(True, False);
952 std::swap(LHS, RHS);
953 CC = DAG.getCondCode(CCSwapped);
954 }
955 }
956 }
957 if (isZero(RHS)) {
958 SDValue Cond = LHS;
959 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000960 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
961 if (CompareVT != VT) {
962 // Bitcast True / False to the correct types. This will end up being
963 // a nop, but it allows us to define only a single pattern in the
964 // .TD files for each CND* instruction rather than having to have
965 // one pattern for integer True/False and one for fp True/False
966 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
967 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
968 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000969
970 switch (CCOpcode) {
971 case ISD::SETONE:
972 case ISD::SETUNE:
973 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000974 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
975 Temp = True;
976 True = False;
977 False = Temp;
978 break;
979 default:
980 break;
981 }
982 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
983 Cond, Zero,
984 True, False,
985 DAG.getCondCode(CCOpcode));
986 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
987 }
988
Tom Stellard75aadc22012-12-11 21:25:42 +0000989 // If we make it this for it means we have no native instructions to handle
990 // this SELECT_CC, so we must lower it.
991 SDValue HWTrue, HWFalse;
992
993 if (CompareVT == MVT::f32) {
994 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
995 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
996 } else if (CompareVT == MVT::i32) {
997 HWTrue = DAG.getConstant(-1, CompareVT);
998 HWFalse = DAG.getConstant(0, CompareVT);
999 }
1000 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001001 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001002 }
1003
1004 // Lower this unsupported SELECT_CC into a combination of two supported
1005 // SELECT_CC operations.
1006 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1007
1008 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1009 Cond, HWFalse,
1010 True, False,
1011 DAG.getCondCode(ISD::SETNE));
1012}
1013
Alp Tokercb402912014-01-24 17:20:08 +00001014/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001015/// convert these pointers to a register index. Each register holds
1016/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1017/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1018/// for indirect addressing.
1019SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1020 unsigned StackWidth,
1021 SelectionDAG &DAG) const {
1022 unsigned SRLPad;
1023 switch(StackWidth) {
1024 case 1:
1025 SRLPad = 2;
1026 break;
1027 case 2:
1028 SRLPad = 3;
1029 break;
1030 case 4:
1031 SRLPad = 4;
1032 break;
1033 default: llvm_unreachable("Invalid stack width");
1034 }
1035
Andrew Trickef9de2a2013-05-25 02:42:55 +00001036 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001037 DAG.getConstant(SRLPad, MVT::i32));
1038}
1039
1040void R600TargetLowering::getStackAddress(unsigned StackWidth,
1041 unsigned ElemIdx,
1042 unsigned &Channel,
1043 unsigned &PtrIncr) const {
1044 switch (StackWidth) {
1045 default:
1046 case 1:
1047 Channel = 0;
1048 if (ElemIdx > 0) {
1049 PtrIncr = 1;
1050 } else {
1051 PtrIncr = 0;
1052 }
1053 break;
1054 case 2:
1055 Channel = ElemIdx % 2;
1056 if (ElemIdx == 2) {
1057 PtrIncr = 1;
1058 } else {
1059 PtrIncr = 0;
1060 }
1061 break;
1062 case 4:
1063 Channel = ElemIdx;
1064 PtrIncr = 0;
1065 break;
1066 }
1067}
1068
Tom Stellard75aadc22012-12-11 21:25:42 +00001069SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001070 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001071 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1072 SDValue Chain = Op.getOperand(0);
1073 SDValue Value = Op.getOperand(1);
1074 SDValue Ptr = Op.getOperand(2);
1075
Tom Stellard2ffc3302013-08-26 15:05:44 +00001076 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001077 if (Result.getNode()) {
1078 return Result;
1079 }
1080
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001081 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1082 if (StoreNode->isTruncatingStore()) {
1083 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001084 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001085 EVT MemVT = StoreNode->getMemoryVT();
1086 SDValue MaskConstant;
1087 if (MemVT == MVT::i8) {
1088 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1089 } else {
1090 assert(MemVT == MVT::i16);
1091 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1092 }
1093 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1094 DAG.getConstant(2, MVT::i32));
1095 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1096 DAG.getConstant(0x00000003, VT));
1097 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1098 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1099 DAG.getConstant(3, VT));
1100 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1101 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1102 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1103 // vector instead.
1104 SDValue Src[4] = {
1105 ShiftedValue,
1106 DAG.getConstant(0, MVT::i32),
1107 DAG.getConstant(0, MVT::i32),
1108 Mask
1109 };
Craig Topper48d114b2014-04-26 18:35:24 +00001110 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001111 SDValue Args[3] = { Chain, Input, DWordAddr };
1112 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001113 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001114 StoreNode->getMemOperand());
1115 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1116 Value.getValueType().bitsGE(MVT::i32)) {
1117 // Convert pointer from byte address to dword address.
1118 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1119 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1120 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001121
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001122 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001123 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001124 } else {
1125 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1126 }
1127 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001128 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001129 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001130
1131 EVT ValueVT = Value.getValueType();
1132
1133 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1134 return SDValue();
1135 }
1136
Tom Stellarde9373602014-01-22 19:24:14 +00001137 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1138 if (Ret.getNode()) {
1139 return Ret;
1140 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001141 // Lowering for indirect addressing
1142
1143 const MachineFunction &MF = DAG.getMachineFunction();
1144 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1145 getTargetMachine().getFrameLowering());
1146 unsigned StackWidth = TFL->getStackWidth(MF);
1147
1148 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1149
1150 if (ValueVT.isVector()) {
1151 unsigned NumElemVT = ValueVT.getVectorNumElements();
1152 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001153 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001154
1155 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1156 "vector width in load");
1157
1158 for (unsigned i = 0; i < NumElemVT; ++i) {
1159 unsigned Channel, PtrIncr;
1160 getStackAddress(StackWidth, i, Channel, PtrIncr);
1161 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1162 DAG.getConstant(PtrIncr, MVT::i32));
1163 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1164 Value, DAG.getConstant(i, MVT::i32));
1165
1166 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1167 Chain, Elem, Ptr,
1168 DAG.getTargetConstant(Channel, MVT::i32));
1169 }
Craig Topper48d114b2014-04-26 18:35:24 +00001170 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001171 } else {
1172 if (ValueVT == MVT::i8) {
1173 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1174 }
1175 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001176 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001177 }
1178
1179 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001180}
1181
Tom Stellard365366f2013-01-23 02:09:06 +00001182// return (512 + (kc_bank << 12)
1183static int
1184ConstantAddressBlock(unsigned AddressSpace) {
1185 switch (AddressSpace) {
1186 case AMDGPUAS::CONSTANT_BUFFER_0:
1187 return 512;
1188 case AMDGPUAS::CONSTANT_BUFFER_1:
1189 return 512 + 4096;
1190 case AMDGPUAS::CONSTANT_BUFFER_2:
1191 return 512 + 4096 * 2;
1192 case AMDGPUAS::CONSTANT_BUFFER_3:
1193 return 512 + 4096 * 3;
1194 case AMDGPUAS::CONSTANT_BUFFER_4:
1195 return 512 + 4096 * 4;
1196 case AMDGPUAS::CONSTANT_BUFFER_5:
1197 return 512 + 4096 * 5;
1198 case AMDGPUAS::CONSTANT_BUFFER_6:
1199 return 512 + 4096 * 6;
1200 case AMDGPUAS::CONSTANT_BUFFER_7:
1201 return 512 + 4096 * 7;
1202 case AMDGPUAS::CONSTANT_BUFFER_8:
1203 return 512 + 4096 * 8;
1204 case AMDGPUAS::CONSTANT_BUFFER_9:
1205 return 512 + 4096 * 9;
1206 case AMDGPUAS::CONSTANT_BUFFER_10:
1207 return 512 + 4096 * 10;
1208 case AMDGPUAS::CONSTANT_BUFFER_11:
1209 return 512 + 4096 * 11;
1210 case AMDGPUAS::CONSTANT_BUFFER_12:
1211 return 512 + 4096 * 12;
1212 case AMDGPUAS::CONSTANT_BUFFER_13:
1213 return 512 + 4096 * 13;
1214 case AMDGPUAS::CONSTANT_BUFFER_14:
1215 return 512 + 4096 * 14;
1216 case AMDGPUAS::CONSTANT_BUFFER_15:
1217 return 512 + 4096 * 15;
1218 default:
1219 return -1;
1220 }
1221}
1222
1223SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1224{
1225 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001226 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001227 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1228 SDValue Chain = Op.getOperand(0);
1229 SDValue Ptr = Op.getOperand(1);
1230 SDValue LoweredLoad;
1231
Tom Stellarde9373602014-01-22 19:24:14 +00001232 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1233 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001234 SDValue Ops[2] = {
1235 Ret,
1236 Chain
1237 };
Craig Topper64941d92014-04-27 19:20:57 +00001238 return DAG.getMergeValues(Ops, DL);
Tom Stellarde9373602014-01-22 19:24:14 +00001239 }
1240
1241
Tom Stellard35bb18c2013-08-26 15:06:04 +00001242 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1243 SDValue MergedValues[2] = {
1244 SplitVectorLoad(Op, DAG),
1245 Chain
1246 };
Craig Topper64941d92014-04-27 19:20:57 +00001247 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001248 }
1249
Tom Stellard365366f2013-01-23 02:09:06 +00001250 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001251 if (ConstantBlock > -1 &&
1252 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1253 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001254 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001255 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1256 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001257 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001258 SDValue Slots[4];
1259 for (unsigned i = 0; i < 4; i++) {
1260 // We want Const position encoded with the following formula :
1261 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1262 // const_index is Ptr computed by llvm using an alignment of 16.
1263 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1264 // then div by 4 at the ISel step
1265 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1266 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1267 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1268 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001269 EVT NewVT = MVT::v4i32;
1270 unsigned NumElements = 4;
1271 if (VT.isVector()) {
1272 NewVT = VT;
1273 NumElements = VT.getVectorNumElements();
1274 }
Craig Topper48d114b2014-04-26 18:35:24 +00001275 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001276 makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001277 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001278 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001279 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001280 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001281 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001282 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001283 );
1284 }
1285
1286 if (!VT.isVector()) {
1287 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1288 DAG.getConstant(0, MVT::i32));
1289 }
1290
1291 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001292 Result,
1293 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001294 };
Craig Topper64941d92014-04-27 19:20:57 +00001295 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001296 }
1297
Matt Arsenault909d0c02013-10-30 23:43:29 +00001298 // For most operations returning SDValue() will result in the node being
1299 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1300 // need to manually expand loads that may be legal in some address spaces and
1301 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1302 // compute shaders, since the data is sign extended when it is uploaded to the
1303 // buffer. However SEXT loads from other address spaces are not supported, so
1304 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001305 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1306 EVT MemVT = LoadNode->getMemoryVT();
1307 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1308 SDValue ShiftAmount =
1309 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1310 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1311 LoadNode->getPointerInfo(), MemVT,
1312 LoadNode->isVolatile(),
1313 LoadNode->isNonTemporal(),
1314 LoadNode->getAlignment());
1315 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1316 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1317
1318 SDValue MergedValues[2] = { Sra, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001319 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001320 }
1321
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001322 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1323 return SDValue();
1324 }
1325
1326 // Lowering for indirect addressing
1327 const MachineFunction &MF = DAG.getMachineFunction();
1328 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1329 getTargetMachine().getFrameLowering());
1330 unsigned StackWidth = TFL->getStackWidth(MF);
1331
1332 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1333
1334 if (VT.isVector()) {
1335 unsigned NumElemVT = VT.getVectorNumElements();
1336 EVT ElemVT = VT.getVectorElementType();
1337 SDValue Loads[4];
1338
1339 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1340 "vector width in load");
1341
1342 for (unsigned i = 0; i < NumElemVT; ++i) {
1343 unsigned Channel, PtrIncr;
1344 getStackAddress(StackWidth, i, Channel, PtrIncr);
1345 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1346 DAG.getConstant(PtrIncr, MVT::i32));
1347 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1348 Chain, Ptr,
1349 DAG.getTargetConstant(Channel, MVT::i32),
1350 Op.getOperand(2));
1351 }
1352 for (unsigned i = NumElemVT; i < 4; ++i) {
1353 Loads[i] = DAG.getUNDEF(ElemVT);
1354 }
1355 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001356 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001357 } else {
1358 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1359 Chain, Ptr,
1360 DAG.getTargetConstant(0, MVT::i32), // Channel
1361 Op.getOperand(2));
1362 }
1363
Matt Arsenault7939acd2014-04-07 16:44:24 +00001364 SDValue Ops[2] = {
1365 LoweredLoad,
1366 Chain
1367 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001368
Craig Topper64941d92014-04-27 19:20:57 +00001369 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001370}
Tom Stellard75aadc22012-12-11 21:25:42 +00001371
Tom Stellard75aadc22012-12-11 21:25:42 +00001372/// XXX Only kernel functions are supported, so we can assume for now that
1373/// every function is a kernel function, but in the future we should use
1374/// separate calling conventions for kernel and non-kernel functions.
1375SDValue R600TargetLowering::LowerFormalArguments(
1376 SDValue Chain,
1377 CallingConv::ID CallConv,
1378 bool isVarArg,
1379 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001380 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001381 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001382 SmallVector<CCValAssign, 16> ArgLocs;
1383 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1384 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001385 MachineFunction &MF = DAG.getMachineFunction();
1386 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001387
Tom Stellardaf775432013-10-23 00:44:32 +00001388 SmallVector<ISD::InputArg, 8> LocalIns;
1389
Matt Arsenault209a7b92014-04-18 07:40:20 +00001390 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001391
1392 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001393
Tom Stellard1e803092013-07-23 01:48:18 +00001394 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001395 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001396 EVT VT = Ins[i].VT;
1397 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001398
Vincent Lejeunef143af32013-11-11 22:10:24 +00001399 if (ShaderType != ShaderType::COMPUTE) {
1400 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1401 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1402 InVals.push_back(Register);
1403 continue;
1404 }
1405
Tom Stellard75aadc22012-12-11 21:25:42 +00001406 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001407 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001408
Matt Arsenaultfae02982014-03-17 18:58:11 +00001409 // i64 isn't a legal type, so the register type used ends up as i32, which
1410 // isn't expected here. It attempts to create this sextload, but it ends up
1411 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1412 // for <1 x i64>.
1413
Tom Stellardacfeebf2013-07-23 01:48:05 +00001414 // The first 36 bytes of the input buffer contains information about
1415 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001416
1417 // FIXME: This should really check the extload type, but the handling of
1418 // extload vecto parameters seems to be broken.
1419 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1420 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1421 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001422 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1423 MachinePointerInfo(UndefValue::get(PtrTy)),
1424 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001425
1426 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001427 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001428 }
1429 return Chain;
1430}
1431
Matt Arsenault758659232013-05-18 00:21:46 +00001432EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001433 if (!VT.isVector())
1434 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001435 return VT.changeVectorElementTypeToInteger();
1436}
1437
Matt Arsenault209a7b92014-04-18 07:40:20 +00001438static SDValue CompactSwizzlableVector(
1439 SelectionDAG &DAG, SDValue VectorEntry,
1440 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001441 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1442 assert(RemapSwizzle.empty());
1443 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001444 VectorEntry.getOperand(0),
1445 VectorEntry.getOperand(1),
1446 VectorEntry.getOperand(2),
1447 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001448 };
1449
1450 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001451 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1452 // We mask write here to teach later passes that the ith element of this
1453 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1454 // break false dependencies and additionnaly make assembly easier to read.
1455 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001456 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1457 if (C->isZero()) {
1458 RemapSwizzle[i] = 4; // SEL_0
1459 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1460 } else if (C->isExactlyValue(1.0)) {
1461 RemapSwizzle[i] = 5; // SEL_1
1462 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1463 }
1464 }
1465
1466 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1467 continue;
1468 for (unsigned j = 0; j < i; j++) {
1469 if (NewBldVec[i] == NewBldVec[j]) {
1470 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1471 RemapSwizzle[i] = j;
1472 break;
1473 }
1474 }
1475 }
1476
1477 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001478 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001479}
1480
Benjamin Kramer193960c2013-06-11 13:32:25 +00001481static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1482 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001483 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1484 assert(RemapSwizzle.empty());
1485 SDValue NewBldVec[4] = {
1486 VectorEntry.getOperand(0),
1487 VectorEntry.getOperand(1),
1488 VectorEntry.getOperand(2),
1489 VectorEntry.getOperand(3)
1490 };
1491 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001492 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001493 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001494 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1495 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1496 ->getZExtValue();
1497 if (i == Idx)
1498 isUnmovable[Idx] = true;
1499 }
1500 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001501
1502 for (unsigned i = 0; i < 4; i++) {
1503 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1504 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1505 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001506 if (isUnmovable[Idx])
1507 continue;
1508 // Swap i and Idx
1509 std::swap(NewBldVec[Idx], NewBldVec[i]);
1510 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1511 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001512 }
1513 }
1514
1515 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001516 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001517}
1518
1519
1520SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1521SDValue Swz[4], SelectionDAG &DAG) const {
1522 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1523 // Old -> New swizzle values
1524 DenseMap<unsigned, unsigned> SwizzleRemap;
1525
1526 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1527 for (unsigned i = 0; i < 4; i++) {
1528 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1529 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1530 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1531 }
1532
1533 SwizzleRemap.clear();
1534 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1535 for (unsigned i = 0; i < 4; i++) {
1536 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1537 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1538 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1539 }
1540
1541 return BuildVector;
1542}
1543
1544
Tom Stellard75aadc22012-12-11 21:25:42 +00001545//===----------------------------------------------------------------------===//
1546// Custom DAG Optimizations
1547//===----------------------------------------------------------------------===//
1548
1549SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1550 DAGCombinerInfo &DCI) const {
1551 SelectionDAG &DAG = DCI.DAG;
1552
1553 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001554 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001555 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1556 case ISD::FP_ROUND: {
1557 SDValue Arg = N->getOperand(0);
1558 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001559 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001560 Arg.getOperand(0));
1561 }
1562 break;
1563 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001564
1565 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1566 // (i32 select_cc f32, f32, -1, 0 cc)
1567 //
1568 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1569 // this to one of the SET*_DX10 instructions.
1570 case ISD::FP_TO_SINT: {
1571 SDValue FNeg = N->getOperand(0);
1572 if (FNeg.getOpcode() != ISD::FNEG) {
1573 return SDValue();
1574 }
1575 SDValue SelectCC = FNeg.getOperand(0);
1576 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1577 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1578 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1579 !isHWTrueValue(SelectCC.getOperand(2)) ||
1580 !isHWFalseValue(SelectCC.getOperand(3))) {
1581 return SDValue();
1582 }
1583
Andrew Trickef9de2a2013-05-25 02:42:55 +00001584 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001585 SelectCC.getOperand(0), // LHS
1586 SelectCC.getOperand(1), // RHS
1587 DAG.getConstant(-1, MVT::i32), // True
1588 DAG.getConstant(0, MVT::i32), // Flase
1589 SelectCC.getOperand(4)); // CC
1590
1591 break;
1592 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001593
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001594 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1595 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001596 case ISD::INSERT_VECTOR_ELT: {
1597 SDValue InVec = N->getOperand(0);
1598 SDValue InVal = N->getOperand(1);
1599 SDValue EltNo = N->getOperand(2);
1600 SDLoc dl(N);
1601
1602 // If the inserted element is an UNDEF, just use the input vector.
1603 if (InVal.getOpcode() == ISD::UNDEF)
1604 return InVec;
1605
1606 EVT VT = InVec.getValueType();
1607
1608 // If we can't generate a legal BUILD_VECTOR, exit
1609 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1610 return SDValue();
1611
1612 // Check that we know which element is being inserted
1613 if (!isa<ConstantSDNode>(EltNo))
1614 return SDValue();
1615 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1616
1617 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1618 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1619 // vector elements.
1620 SmallVector<SDValue, 8> Ops;
1621 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1622 Ops.append(InVec.getNode()->op_begin(),
1623 InVec.getNode()->op_end());
1624 } else if (InVec.getOpcode() == ISD::UNDEF) {
1625 unsigned NElts = VT.getVectorNumElements();
1626 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1627 } else {
1628 return SDValue();
1629 }
1630
1631 // Insert the element
1632 if (Elt < Ops.size()) {
1633 // All the operands of BUILD_VECTOR must have the same type;
1634 // we enforce that here.
1635 EVT OpVT = Ops[0].getValueType();
1636 if (InVal.getValueType() != OpVT)
1637 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1638 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1639 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1640 Ops[Elt] = InVal;
1641 }
1642
1643 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001644 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001645 }
1646
Tom Stellard365366f2013-01-23 02:09:06 +00001647 // Extract_vec (Build_vector) generated by custom lowering
1648 // also needs to be customly combined
1649 case ISD::EXTRACT_VECTOR_ELT: {
1650 SDValue Arg = N->getOperand(0);
1651 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1652 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1653 unsigned Element = Const->getZExtValue();
1654 return Arg->getOperand(Element);
1655 }
1656 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001657 if (Arg.getOpcode() == ISD::BITCAST &&
1658 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1659 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1660 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001661 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001662 Arg->getOperand(0).getOperand(Element));
1663 }
1664 }
Tom Stellard365366f2013-01-23 02:09:06 +00001665 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001666
1667 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00001668 // Try common optimizations
1669 SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1670 if (Ret.getNode())
1671 return Ret;
1672
Tom Stellarde06163a2013-02-07 14:02:35 +00001673 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1674 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001675 //
1676 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1677 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001678 SDValue LHS = N->getOperand(0);
1679 if (LHS.getOpcode() != ISD::SELECT_CC) {
1680 return SDValue();
1681 }
1682
1683 SDValue RHS = N->getOperand(1);
1684 SDValue True = N->getOperand(2);
1685 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001686 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001687
1688 if (LHS.getOperand(2).getNode() != True.getNode() ||
1689 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001690 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001691 return SDValue();
1692 }
1693
Tom Stellard5e524892013-03-08 15:37:11 +00001694 switch (NCC) {
1695 default: return SDValue();
1696 case ISD::SETNE: return LHS;
1697 case ISD::SETEQ: {
1698 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1699 LHSCC = ISD::getSetCCInverse(LHSCC,
1700 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001701 if (DCI.isBeforeLegalizeOps() ||
1702 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1703 return DAG.getSelectCC(SDLoc(N),
1704 LHS.getOperand(0),
1705 LHS.getOperand(1),
1706 LHS.getOperand(2),
1707 LHS.getOperand(3),
1708 LHSCC);
1709 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001710 }
Tom Stellard5e524892013-03-08 15:37:11 +00001711 }
Tom Stellardcd428182013-09-28 02:50:38 +00001712 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001713 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001714
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001715 case AMDGPUISD::EXPORT: {
1716 SDValue Arg = N->getOperand(1);
1717 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1718 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001719
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001720 SDValue NewArgs[8] = {
1721 N->getOperand(0), // Chain
1722 SDValue(),
1723 N->getOperand(2), // ArrayBase
1724 N->getOperand(3), // Type
1725 N->getOperand(4), // SWZ_X
1726 N->getOperand(5), // SWZ_Y
1727 N->getOperand(6), // SWZ_Z
1728 N->getOperand(7) // SWZ_W
1729 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001730 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001731 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001732 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001733 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001734 case AMDGPUISD::TEXTURE_FETCH: {
1735 SDValue Arg = N->getOperand(1);
1736 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1737 break;
1738
1739 SDValue NewArgs[19] = {
1740 N->getOperand(0),
1741 N->getOperand(1),
1742 N->getOperand(2),
1743 N->getOperand(3),
1744 N->getOperand(4),
1745 N->getOperand(5),
1746 N->getOperand(6),
1747 N->getOperand(7),
1748 N->getOperand(8),
1749 N->getOperand(9),
1750 N->getOperand(10),
1751 N->getOperand(11),
1752 N->getOperand(12),
1753 N->getOperand(13),
1754 N->getOperand(14),
1755 N->getOperand(15),
1756 N->getOperand(16),
1757 N->getOperand(17),
1758 N->getOperand(18),
1759 };
1760 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1761 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00001762 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001763 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001764 }
1765 return SDValue();
1766}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001767
1768static bool
1769FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001770 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001771 const R600InstrInfo *TII =
1772 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1773 if (!Src.isMachineOpcode())
1774 return false;
1775 switch (Src.getMachineOpcode()) {
1776 case AMDGPU::FNEG_R600:
1777 if (!Neg.getNode())
1778 return false;
1779 Src = Src.getOperand(0);
1780 Neg = DAG.getTargetConstant(1, MVT::i32);
1781 return true;
1782 case AMDGPU::FABS_R600:
1783 if (!Abs.getNode())
1784 return false;
1785 Src = Src.getOperand(0);
1786 Abs = DAG.getTargetConstant(1, MVT::i32);
1787 return true;
1788 case AMDGPU::CONST_COPY: {
1789 unsigned Opcode = ParentNode->getMachineOpcode();
1790 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1791
1792 if (!Sel.getNode())
1793 return false;
1794
1795 SDValue CstOffset = Src.getOperand(0);
1796 if (ParentNode->getValueType(0).isVector())
1797 return false;
1798
1799 // Gather constants values
1800 int SrcIndices[] = {
1801 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1802 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1803 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1804 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1805 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1806 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1807 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1808 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1809 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1810 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1811 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1812 };
1813 std::vector<unsigned> Consts;
1814 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1815 int OtherSrcIdx = SrcIndices[i];
1816 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1817 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1818 continue;
1819 if (HasDst) {
1820 OtherSrcIdx--;
1821 OtherSelIdx--;
1822 }
1823 if (RegisterSDNode *Reg =
1824 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1825 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1826 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1827 ParentNode->getOperand(OtherSelIdx));
1828 Consts.push_back(Cst->getZExtValue());
1829 }
1830 }
1831 }
1832
1833 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1834 Consts.push_back(Cst->getZExtValue());
1835 if (!TII->fitsConstReadLimitations(Consts)) {
1836 return false;
1837 }
1838
1839 Sel = CstOffset;
1840 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1841 return true;
1842 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001843 case AMDGPU::MOV_IMM_I32:
1844 case AMDGPU::MOV_IMM_F32: {
1845 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1846 uint64_t ImmValue = 0;
1847
1848
1849 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1850 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1851 float FloatValue = FPC->getValueAPF().convertToFloat();
1852 if (FloatValue == 0.0) {
1853 ImmReg = AMDGPU::ZERO;
1854 } else if (FloatValue == 0.5) {
1855 ImmReg = AMDGPU::HALF;
1856 } else if (FloatValue == 1.0) {
1857 ImmReg = AMDGPU::ONE;
1858 } else {
1859 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1860 }
1861 } else {
1862 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1863 uint64_t Value = C->getZExtValue();
1864 if (Value == 0) {
1865 ImmReg = AMDGPU::ZERO;
1866 } else if (Value == 1) {
1867 ImmReg = AMDGPU::ONE_INT;
1868 } else {
1869 ImmValue = Value;
1870 }
1871 }
1872
1873 // Check that we aren't already using an immediate.
1874 // XXX: It's possible for an instruction to have more than one
1875 // immediate operand, but this is not supported yet.
1876 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1877 if (!Imm.getNode())
1878 return false;
1879 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1880 assert(C);
1881 if (C->getZExtValue())
1882 return false;
1883 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1884 }
1885 Src = DAG.getRegister(ImmReg, MVT::i32);
1886 return true;
1887 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001888 default:
1889 return false;
1890 }
1891}
1892
1893
1894/// \brief Fold the instructions after selecting them
1895SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1896 SelectionDAG &DAG) const {
1897 const R600InstrInfo *TII =
1898 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1899 if (!Node->isMachineOpcode())
1900 return Node;
1901 unsigned Opcode = Node->getMachineOpcode();
1902 SDValue FakeOp;
1903
1904 std::vector<SDValue> Ops;
1905 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1906 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001907 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001908
1909 if (Opcode == AMDGPU::DOT_4) {
1910 int OperandIdx[] = {
1911 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1912 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1913 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1914 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1915 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1916 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1917 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1918 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001919 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001920 int NegIdx[] = {
1921 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1922 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1923 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1924 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1925 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1926 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1927 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1928 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1929 };
1930 int AbsIdx[] = {
1931 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1932 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1933 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1934 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1935 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1936 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1937 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1938 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1939 };
1940 for (unsigned i = 0; i < 8; i++) {
1941 if (OperandIdx[i] < 0)
1942 return Node;
1943 SDValue &Src = Ops[OperandIdx[i] - 1];
1944 SDValue &Neg = Ops[NegIdx[i] - 1];
1945 SDValue &Abs = Ops[AbsIdx[i] - 1];
1946 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1947 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1948 if (HasDst)
1949 SelIdx--;
1950 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001951 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1952 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1953 }
1954 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1955 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1956 SDValue &Src = Ops[i];
1957 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001958 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1959 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001960 } else if (Opcode == AMDGPU::CLAMP_R600) {
1961 SDValue Src = Node->getOperand(0);
1962 if (!Src.isMachineOpcode() ||
1963 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1964 return Node;
1965 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1966 AMDGPU::OpName::clamp);
1967 if (ClampIdx < 0)
1968 return Node;
1969 std::vector<SDValue> Ops;
1970 unsigned NumOp = Src.getNumOperands();
1971 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001972 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001973 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1974 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1975 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001976 } else {
1977 if (!TII->hasInstrModifiers(Opcode))
1978 return Node;
1979 int OperandIdx[] = {
1980 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1981 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1982 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1983 };
1984 int NegIdx[] = {
1985 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1986 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1987 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1988 };
1989 int AbsIdx[] = {
1990 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1991 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1992 -1
1993 };
1994 for (unsigned i = 0; i < 3; i++) {
1995 if (OperandIdx[i] < 0)
1996 return Node;
1997 SDValue &Src = Ops[OperandIdx[i] - 1];
1998 SDValue &Neg = Ops[NegIdx[i] - 1];
1999 SDValue FakeAbs;
2000 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2001 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2002 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002003 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2004 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002005 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002006 ImmIdx--;
2007 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002008 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002009 SDValue &Imm = Ops[ImmIdx];
2010 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002011 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2012 }
2013 }
2014
2015 return Node;
2016}