blob: ecffee23dedefe9848f90af614b7541f7eac7d11 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Matt Arsenault4e466652014-04-16 01:41:30 +000089 // Expand sign extension of vectors
90 if (!Subtarget->hasBFE())
91 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
92
93 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
95
96 if (!Subtarget->hasBFE())
97 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
109
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
111
112
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000113 // Legalize loads and stores to the private address space.
114 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000115 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000116 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000117
118 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
119 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000120 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
121 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
122 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000124 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
125 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
126
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000127 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000128 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000129 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000130 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000131 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
132 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133
Tom Stellard365366f2013-01-23 02:09:06 +0000134 setOperationAction(ISD::LOAD, MVT::i32, Custom);
135 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000136 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
137
Tom Stellard75aadc22012-12-11 21:25:42 +0000138 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000139 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000140 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000141 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000142 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000143
Michel Danzer49812b52013-07-10 16:37:07 +0000144 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
145
Tom Stellardb852af52013-03-08 15:37:03 +0000146 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000147 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000148 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000149}
150
151MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
152 MachineInstr * MI, MachineBasicBlock * BB) const {
153 MachineFunction * MF = BB->getParent();
154 MachineRegisterInfo &MRI = MF->getRegInfo();
155 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000156 const R600InstrInfo *TII =
157 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000158
159 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000160 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000161 // Replace LDS_*_RET instruction that don't have any uses with the
162 // equivalent LDS_*_NORET instruction.
163 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000164 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
165 assert(DstIdx != -1);
166 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000167 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
168 return BB;
169
170 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
171 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000172 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
173 NewMI.addOperand(MI->getOperand(i));
174 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000175 } else {
176 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
177 }
178 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000179 case AMDGPU::CLAMP_R600: {
180 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
181 AMDGPU::MOV,
182 MI->getOperand(0).getReg(),
183 MI->getOperand(1).getReg());
184 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
185 break;
186 }
187
188 case AMDGPU::FABS_R600: {
189 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
190 AMDGPU::MOV,
191 MI->getOperand(0).getReg(),
192 MI->getOperand(1).getReg());
193 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
194 break;
195 }
196
197 case AMDGPU::FNEG_R600: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
199 AMDGPU::MOV,
200 MI->getOperand(0).getReg(),
201 MI->getOperand(1).getReg());
202 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
203 break;
204 }
205
Tom Stellard75aadc22012-12-11 21:25:42 +0000206 case AMDGPU::MASK_WRITE: {
207 unsigned maskedRegister = MI->getOperand(0).getReg();
208 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
209 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
210 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
211 break;
212 }
213
214 case AMDGPU::MOV_IMM_F32:
215 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
216 MI->getOperand(1).getFPImm()->getValueAPF()
217 .bitcastToAPInt().getZExtValue());
218 break;
219 case AMDGPU::MOV_IMM_I32:
220 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
221 MI->getOperand(1).getImm());
222 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000223 case AMDGPU::CONST_COPY: {
224 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
225 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000226 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000227 MI->getOperand(1).getImm());
228 break;
229 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000230
231 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000232 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000233 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000234 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000235
236 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
237 .addOperand(MI->getOperand(0))
238 .addOperand(MI->getOperand(1))
239 .addImm(EOP); // Set End of program bit
240 break;
241 }
242
Tom Stellard75aadc22012-12-11 21:25:42 +0000243 case AMDGPU::TXD: {
244 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
245 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000246 MachineOperand &RID = MI->getOperand(4);
247 MachineOperand &SID = MI->getOperand(5);
248 unsigned TextureId = MI->getOperand(6).getImm();
249 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
250 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000251
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000252 switch (TextureId) {
253 case 5: // Rect
254 CTX = CTY = 0;
255 break;
256 case 6: // Shadow1D
257 SrcW = SrcZ;
258 break;
259 case 7: // Shadow2D
260 SrcW = SrcZ;
261 break;
262 case 8: // ShadowRect
263 CTX = CTY = 0;
264 SrcW = SrcZ;
265 break;
266 case 9: // 1DArray
267 SrcZ = SrcY;
268 CTZ = 0;
269 break;
270 case 10: // 2DArray
271 CTZ = 0;
272 break;
273 case 11: // Shadow1DArray
274 SrcZ = SrcY;
275 CTZ = 0;
276 break;
277 case 12: // Shadow2DArray
278 CTZ = 0;
279 break;
280 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000281 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
282 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000283 .addImm(SrcX)
284 .addImm(SrcY)
285 .addImm(SrcZ)
286 .addImm(SrcW)
287 .addImm(0)
288 .addImm(0)
289 .addImm(0)
290 .addImm(0)
291 .addImm(1)
292 .addImm(2)
293 .addImm(3)
294 .addOperand(RID)
295 .addOperand(SID)
296 .addImm(CTX)
297 .addImm(CTY)
298 .addImm(CTZ)
299 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000300 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
301 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000302 .addImm(SrcX)
303 .addImm(SrcY)
304 .addImm(SrcZ)
305 .addImm(SrcW)
306 .addImm(0)
307 .addImm(0)
308 .addImm(0)
309 .addImm(0)
310 .addImm(1)
311 .addImm(2)
312 .addImm(3)
313 .addOperand(RID)
314 .addOperand(SID)
315 .addImm(CTX)
316 .addImm(CTY)
317 .addImm(CTZ)
318 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000319 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
320 .addOperand(MI->getOperand(0))
321 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000322 .addImm(SrcX)
323 .addImm(SrcY)
324 .addImm(SrcZ)
325 .addImm(SrcW)
326 .addImm(0)
327 .addImm(0)
328 .addImm(0)
329 .addImm(0)
330 .addImm(1)
331 .addImm(2)
332 .addImm(3)
333 .addOperand(RID)
334 .addOperand(SID)
335 .addImm(CTX)
336 .addImm(CTY)
337 .addImm(CTZ)
338 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000339 .addReg(T0, RegState::Implicit)
340 .addReg(T1, RegState::Implicit);
341 break;
342 }
343
344 case AMDGPU::TXD_SHADOW: {
345 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
346 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 MachineOperand &RID = MI->getOperand(4);
348 MachineOperand &SID = MI->getOperand(5);
349 unsigned TextureId = MI->getOperand(6).getImm();
350 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
351 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
352
353 switch (TextureId) {
354 case 5: // Rect
355 CTX = CTY = 0;
356 break;
357 case 6: // Shadow1D
358 SrcW = SrcZ;
359 break;
360 case 7: // Shadow2D
361 SrcW = SrcZ;
362 break;
363 case 8: // ShadowRect
364 CTX = CTY = 0;
365 SrcW = SrcZ;
366 break;
367 case 9: // 1DArray
368 SrcZ = SrcY;
369 CTZ = 0;
370 break;
371 case 10: // 2DArray
372 CTZ = 0;
373 break;
374 case 11: // Shadow1DArray
375 SrcZ = SrcY;
376 CTZ = 0;
377 break;
378 case 12: // Shadow2DArray
379 CTZ = 0;
380 break;
381 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000382
383 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
384 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000385 .addImm(SrcX)
386 .addImm(SrcY)
387 .addImm(SrcZ)
388 .addImm(SrcW)
389 .addImm(0)
390 .addImm(0)
391 .addImm(0)
392 .addImm(0)
393 .addImm(1)
394 .addImm(2)
395 .addImm(3)
396 .addOperand(RID)
397 .addOperand(SID)
398 .addImm(CTX)
399 .addImm(CTY)
400 .addImm(CTZ)
401 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
403 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000404 .addImm(SrcX)
405 .addImm(SrcY)
406 .addImm(SrcZ)
407 .addImm(SrcW)
408 .addImm(0)
409 .addImm(0)
410 .addImm(0)
411 .addImm(0)
412 .addImm(1)
413 .addImm(2)
414 .addImm(3)
415 .addOperand(RID)
416 .addOperand(SID)
417 .addImm(CTX)
418 .addImm(CTY)
419 .addImm(CTZ)
420 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
422 .addOperand(MI->getOperand(0))
423 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000424 .addImm(SrcX)
425 .addImm(SrcY)
426 .addImm(SrcZ)
427 .addImm(SrcW)
428 .addImm(0)
429 .addImm(0)
430 .addImm(0)
431 .addImm(0)
432 .addImm(1)
433 .addImm(2)
434 .addImm(3)
435 .addOperand(RID)
436 .addOperand(SID)
437 .addImm(CTX)
438 .addImm(CTY)
439 .addImm(CTZ)
440 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000441 .addReg(T0, RegState::Implicit)
442 .addReg(T1, RegState::Implicit);
443 break;
444 }
445
446 case AMDGPU::BRANCH:
447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000448 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000449 break;
450
451 case AMDGPU::BRANCH_COND_f32: {
452 MachineInstr *NewMI =
453 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
454 AMDGPU::PREDICATE_BIT)
455 .addOperand(MI->getOperand(1))
456 .addImm(OPCODE_IS_NOT_ZERO)
457 .addImm(0); // Flags
458 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000459 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000460 .addOperand(MI->getOperand(0))
461 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
462 break;
463 }
464
465 case AMDGPU::BRANCH_COND_i32: {
466 MachineInstr *NewMI =
467 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
468 AMDGPU::PREDICATE_BIT)
469 .addOperand(MI->getOperand(1))
470 .addImm(OPCODE_IS_NOT_ZERO_INT)
471 .addImm(0); // Flags
472 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000473 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000474 .addOperand(MI->getOperand(0))
475 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
476 break;
477 }
478
Tom Stellard75aadc22012-12-11 21:25:42 +0000479 case AMDGPU::EG_ExportSwz:
480 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000481 // Instruction is left unmodified if its not the last one of its type
482 bool isLastInstructionOfItsType = true;
483 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000484 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000485 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000486 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000487 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
488 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
489 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
490 .getImm();
491 if (CurrentInstExportType == InstExportType) {
492 isLastInstructionOfItsType = false;
493 break;
494 }
495 }
496 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000497 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000498 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000499 return BB;
500 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
501 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
502 .addOperand(MI->getOperand(0))
503 .addOperand(MI->getOperand(1))
504 .addOperand(MI->getOperand(2))
505 .addOperand(MI->getOperand(3))
506 .addOperand(MI->getOperand(4))
507 .addOperand(MI->getOperand(5))
508 .addOperand(MI->getOperand(6))
509 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000510 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 break;
512 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000513 case AMDGPU::RETURN: {
514 // RETURN instructions must have the live-out registers as implicit uses,
515 // otherwise they appear dead.
516 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
517 MachineInstrBuilder MIB(*MF, MI);
518 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
519 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
520 return BB;
521 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000522 }
523
524 MI->eraseFromParent();
525 return BB;
526}
527
528//===----------------------------------------------------------------------===//
529// Custom DAG Lowering Operations
530//===----------------------------------------------------------------------===//
531
Tom Stellard75aadc22012-12-11 21:25:42 +0000532SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000533 MachineFunction &MF = DAG.getMachineFunction();
534 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 switch (Op.getOpcode()) {
536 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000537 case ISD::FCOS:
538 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000539 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000540 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000541 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000542 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 case ISD::INTRINSIC_VOID: {
544 SDValue Chain = Op.getOperand(0);
545 unsigned IntrinsicID =
546 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
547 switch (IntrinsicID) {
548 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000549 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
550 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000551 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000552 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000553 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000554 case AMDGPUIntrinsic::R600_store_swizzle: {
555 const SDValue Args[8] = {
556 Chain,
557 Op.getOperand(2), // Export Value
558 Op.getOperand(3), // ArrayBase
559 Op.getOperand(4), // Type
560 DAG.getConstant(0, MVT::i32), // SWZ_X
561 DAG.getConstant(1, MVT::i32), // SWZ_Y
562 DAG.getConstant(2, MVT::i32), // SWZ_Z
563 DAG.getConstant(3, MVT::i32) // SWZ_W
564 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000565 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000566 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000567 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000568
Tom Stellard75aadc22012-12-11 21:25:42 +0000569 // default for switch(IntrinsicID)
570 default: break;
571 }
572 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
573 break;
574 }
575 case ISD::INTRINSIC_WO_CHAIN: {
576 unsigned IntrinsicID =
577 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
578 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000579 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 switch(IntrinsicID) {
581 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000582 case AMDGPUIntrinsic::R600_load_input: {
583 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
584 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
585 MachineFunction &MF = DAG.getMachineFunction();
586 MachineRegisterInfo &MRI = MF.getRegInfo();
587 MRI.addLiveIn(Reg);
588 return DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), Reg, VT);
590 }
591
592 case AMDGPUIntrinsic::R600_interp_input: {
593 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
594 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
595 MachineSDNode *interp;
596 if (ijb < 0) {
597 const MachineFunction &MF = DAG.getMachineFunction();
598 const R600InstrInfo *TII =
599 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
600 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
601 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
602 return DAG.getTargetExtractSubreg(
603 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
604 DL, MVT::f32, SDValue(interp, 0));
605 }
606 MachineFunction &MF = DAG.getMachineFunction();
607 MachineRegisterInfo &MRI = MF.getRegInfo();
608 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
609 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
610 MRI.addLiveIn(RegisterI);
611 MRI.addLiveIn(RegisterJ);
612 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
613 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
614 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
615 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
616
617 if (slot % 4 < 2)
618 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
619 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
620 RegisterJNode, RegisterINode);
621 else
622 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
623 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
624 RegisterJNode, RegisterINode);
625 return SDValue(interp, slot % 2);
626 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000627 case AMDGPUIntrinsic::R600_interp_xy:
628 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000629 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000630 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000631 SDValue RegisterINode = Op.getOperand(2);
632 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000633
Vincent Lejeunef143af32013-11-11 22:10:24 +0000634 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000635 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000636 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000637 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000638 else
639 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000640 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000641 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000642 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
643 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000644 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000645 case AMDGPUIntrinsic::R600_tex:
646 case AMDGPUIntrinsic::R600_texc:
647 case AMDGPUIntrinsic::R600_txl:
648 case AMDGPUIntrinsic::R600_txlc:
649 case AMDGPUIntrinsic::R600_txb:
650 case AMDGPUIntrinsic::R600_txbc:
651 case AMDGPUIntrinsic::R600_txf:
652 case AMDGPUIntrinsic::R600_txq:
653 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000654 case AMDGPUIntrinsic::R600_ddy:
655 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000656 unsigned TextureOp;
657 switch (IntrinsicID) {
658 case AMDGPUIntrinsic::R600_tex:
659 TextureOp = 0;
660 break;
661 case AMDGPUIntrinsic::R600_texc:
662 TextureOp = 1;
663 break;
664 case AMDGPUIntrinsic::R600_txl:
665 TextureOp = 2;
666 break;
667 case AMDGPUIntrinsic::R600_txlc:
668 TextureOp = 3;
669 break;
670 case AMDGPUIntrinsic::R600_txb:
671 TextureOp = 4;
672 break;
673 case AMDGPUIntrinsic::R600_txbc:
674 TextureOp = 5;
675 break;
676 case AMDGPUIntrinsic::R600_txf:
677 TextureOp = 6;
678 break;
679 case AMDGPUIntrinsic::R600_txq:
680 TextureOp = 7;
681 break;
682 case AMDGPUIntrinsic::R600_ddx:
683 TextureOp = 8;
684 break;
685 case AMDGPUIntrinsic::R600_ddy:
686 TextureOp = 9;
687 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000688 case AMDGPUIntrinsic::R600_ldptr:
689 TextureOp = 10;
690 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000691 default:
692 llvm_unreachable("Unknow Texture Operation");
693 }
694
695 SDValue TexArgs[19] = {
696 DAG.getConstant(TextureOp, MVT::i32),
697 Op.getOperand(1),
698 DAG.getConstant(0, MVT::i32),
699 DAG.getConstant(1, MVT::i32),
700 DAG.getConstant(2, MVT::i32),
701 DAG.getConstant(3, MVT::i32),
702 Op.getOperand(2),
703 Op.getOperand(3),
704 Op.getOperand(4),
705 DAG.getConstant(0, MVT::i32),
706 DAG.getConstant(1, MVT::i32),
707 DAG.getConstant(2, MVT::i32),
708 DAG.getConstant(3, MVT::i32),
709 Op.getOperand(5),
710 Op.getOperand(6),
711 Op.getOperand(7),
712 Op.getOperand(8),
713 Op.getOperand(9),
714 Op.getOperand(10)
715 };
716 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
717 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000718 case AMDGPUIntrinsic::AMDGPU_dp4: {
719 SDValue Args[8] = {
720 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
721 DAG.getConstant(0, MVT::i32)),
722 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
723 DAG.getConstant(0, MVT::i32)),
724 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
725 DAG.getConstant(1, MVT::i32)),
726 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
727 DAG.getConstant(1, MVT::i32)),
728 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
729 DAG.getConstant(2, MVT::i32)),
730 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
731 DAG.getConstant(2, MVT::i32)),
732 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
733 DAG.getConstant(3, MVT::i32)),
734 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
735 DAG.getConstant(3, MVT::i32))
736 };
737 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
738 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000739
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000740 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000741 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000742 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000743 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000746 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000747 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000748 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000749 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000752 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000753 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000754 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000755 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000756 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000757 return LowerImplicitParameter(DAG, VT, DL, 8);
758
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000759 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000760 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
761 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000762 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000763 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
764 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
767 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
770 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
773 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
776 AMDGPU::T0_Z, VT);
777 }
778 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
779 break;
780 }
781 } // end switch(Op.getOpcode())
782 return SDValue();
783}
784
785void R600TargetLowering::ReplaceNodeResults(SDNode *N,
786 SmallVectorImpl<SDValue> &Results,
787 SelectionDAG &DAG) const {
788 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000789 default:
790 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
791 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000792 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000793 return;
794 case ISD::LOAD: {
795 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
796 Results.push_back(SDValue(Node, 0));
797 Results.push_back(SDValue(Node, 1));
798 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
799 // function
800 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
801 return;
802 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000803 case ISD::STORE:
804 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
805 Results.push_back(SDValue(Node, 0));
806 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000807 }
808}
809
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000810SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
811 // On hw >= R700, COS/SIN input must be between -1. and 1.
812 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
813 EVT VT = Op.getValueType();
814 SDValue Arg = Op.getOperand(0);
815 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
816 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
817 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
818 DAG.getConstantFP(0.15915494309, MVT::f32)),
819 DAG.getConstantFP(0.5, MVT::f32)));
820 unsigned TrigNode;
821 switch (Op.getOpcode()) {
822 case ISD::FCOS:
823 TrigNode = AMDGPUISD::COS_HW;
824 break;
825 case ISD::FSIN:
826 TrigNode = AMDGPUISD::SIN_HW;
827 break;
828 default:
829 llvm_unreachable("Wrong trig opcode");
830 }
831 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
832 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
833 DAG.getConstantFP(-0.5, MVT::f32)));
834 if (Gen >= AMDGPUSubtarget::R700)
835 return TrigVal;
836 // On R600 hw, COS/SIN input must be between -Pi and Pi.
837 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
838 DAG.getConstantFP(3.14159265359, MVT::f32));
839}
840
Tom Stellard75aadc22012-12-11 21:25:42 +0000841SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
842 return DAG.getNode(
843 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000844 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000845 MVT::i1,
846 Op, DAG.getConstantFP(0.0f, MVT::f32),
847 DAG.getCondCode(ISD::SETNE)
848 );
849}
850
Tom Stellard75aadc22012-12-11 21:25:42 +0000851SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000852 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000853 unsigned DwordOffset) const {
854 unsigned ByteOffset = DwordOffset * 4;
855 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000856 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000857
858 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
859 assert(isInt<16>(ByteOffset));
860
861 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
862 DAG.getConstant(ByteOffset, MVT::i32), // PTR
863 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
864 false, false, false, 0);
865}
866
Tom Stellard75aadc22012-12-11 21:25:42 +0000867bool R600TargetLowering::isZero(SDValue Op) const {
868 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
869 return Cst->isNullValue();
870 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
871 return CstFP->isZero();
872 } else {
873 return false;
874 }
875}
876
877SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000878 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000879 EVT VT = Op.getValueType();
880
881 SDValue LHS = Op.getOperand(0);
882 SDValue RHS = Op.getOperand(1);
883 SDValue True = Op.getOperand(2);
884 SDValue False = Op.getOperand(3);
885 SDValue CC = Op.getOperand(4);
886 SDValue Temp;
887
888 // LHS and RHS are guaranteed to be the same value type
889 EVT CompareVT = LHS.getValueType();
890
891 // Check if we can lower this to a native operation.
892
Tom Stellard2add82d2013-03-08 15:37:09 +0000893 // Try to lower to a SET* instruction:
894 //
895 // SET* can match the following patterns:
896 //
Tom Stellardcd428182013-09-28 02:50:38 +0000897 // select_cc f32, f32, -1, 0, cc_supported
898 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
899 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000900 //
901
902 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000903 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
904 ISD::CondCode InverseCC =
905 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000906 if (isHWTrueValue(False) && isHWFalseValue(True)) {
907 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
908 std::swap(False, True);
909 CC = DAG.getCondCode(InverseCC);
910 } else {
911 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
912 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
913 std::swap(False, True);
914 std::swap(LHS, RHS);
915 CC = DAG.getCondCode(SwapInvCC);
916 }
917 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000918 }
919
920 if (isHWTrueValue(True) && isHWFalseValue(False) &&
921 (CompareVT == VT || VT == MVT::i32)) {
922 // This can be matched by a SET* instruction.
923 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
924 }
925
Tom Stellard75aadc22012-12-11 21:25:42 +0000926 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000927 //
928 // CND* can match the following patterns:
929 //
Tom Stellardcd428182013-09-28 02:50:38 +0000930 // select_cc f32, 0.0, f32, f32, cc_supported
931 // select_cc f32, 0.0, i32, i32, cc_supported
932 // select_cc i32, 0, f32, f32, cc_supported
933 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000934 //
Tom Stellardcd428182013-09-28 02:50:38 +0000935
936 // Try to move the zero value to the RHS
937 if (isZero(LHS)) {
938 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
939 // Try swapping the operands
940 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
941 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
942 std::swap(LHS, RHS);
943 CC = DAG.getCondCode(CCSwapped);
944 } else {
945 // Try inverting the conditon and then swapping the operands
946 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
947 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
948 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
949 std::swap(True, False);
950 std::swap(LHS, RHS);
951 CC = DAG.getCondCode(CCSwapped);
952 }
953 }
954 }
955 if (isZero(RHS)) {
956 SDValue Cond = LHS;
957 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000958 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
959 if (CompareVT != VT) {
960 // Bitcast True / False to the correct types. This will end up being
961 // a nop, but it allows us to define only a single pattern in the
962 // .TD files for each CND* instruction rather than having to have
963 // one pattern for integer True/False and one for fp True/False
964 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
965 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
966 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000967
968 switch (CCOpcode) {
969 case ISD::SETONE:
970 case ISD::SETUNE:
971 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000972 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
973 Temp = True;
974 True = False;
975 False = Temp;
976 break;
977 default:
978 break;
979 }
980 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
981 Cond, Zero,
982 True, False,
983 DAG.getCondCode(CCOpcode));
984 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
985 }
986
Tom Stellard75aadc22012-12-11 21:25:42 +0000987
988 // Possible Min/Max pattern
989 SDValue MinMax = LowerMinMax(Op, DAG);
990 if (MinMax.getNode()) {
991 return MinMax;
992 }
993
994 // If we make it this for it means we have no native instructions to handle
995 // this SELECT_CC, so we must lower it.
996 SDValue HWTrue, HWFalse;
997
998 if (CompareVT == MVT::f32) {
999 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
1000 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1001 } else if (CompareVT == MVT::i32) {
1002 HWTrue = DAG.getConstant(-1, CompareVT);
1003 HWFalse = DAG.getConstant(0, CompareVT);
1004 }
1005 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001006 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001007 }
1008
1009 // Lower this unsupported SELECT_CC into a combination of two supported
1010 // SELECT_CC operations.
1011 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1012
1013 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1014 Cond, HWFalse,
1015 True, False,
1016 DAG.getCondCode(ISD::SETNE));
1017}
1018
Alp Tokercb402912014-01-24 17:20:08 +00001019/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001020/// convert these pointers to a register index. Each register holds
1021/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1022/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1023/// for indirect addressing.
1024SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1025 unsigned StackWidth,
1026 SelectionDAG &DAG) const {
1027 unsigned SRLPad;
1028 switch(StackWidth) {
1029 case 1:
1030 SRLPad = 2;
1031 break;
1032 case 2:
1033 SRLPad = 3;
1034 break;
1035 case 4:
1036 SRLPad = 4;
1037 break;
1038 default: llvm_unreachable("Invalid stack width");
1039 }
1040
Andrew Trickef9de2a2013-05-25 02:42:55 +00001041 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001042 DAG.getConstant(SRLPad, MVT::i32));
1043}
1044
1045void R600TargetLowering::getStackAddress(unsigned StackWidth,
1046 unsigned ElemIdx,
1047 unsigned &Channel,
1048 unsigned &PtrIncr) const {
1049 switch (StackWidth) {
1050 default:
1051 case 1:
1052 Channel = 0;
1053 if (ElemIdx > 0) {
1054 PtrIncr = 1;
1055 } else {
1056 PtrIncr = 0;
1057 }
1058 break;
1059 case 2:
1060 Channel = ElemIdx % 2;
1061 if (ElemIdx == 2) {
1062 PtrIncr = 1;
1063 } else {
1064 PtrIncr = 0;
1065 }
1066 break;
1067 case 4:
1068 Channel = ElemIdx;
1069 PtrIncr = 0;
1070 break;
1071 }
1072}
1073
Tom Stellard75aadc22012-12-11 21:25:42 +00001074SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001075 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001076 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1077 SDValue Chain = Op.getOperand(0);
1078 SDValue Value = Op.getOperand(1);
1079 SDValue Ptr = Op.getOperand(2);
1080
Tom Stellard2ffc3302013-08-26 15:05:44 +00001081 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001082 if (Result.getNode()) {
1083 return Result;
1084 }
1085
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001086 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1087 if (StoreNode->isTruncatingStore()) {
1088 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001089 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001090 EVT MemVT = StoreNode->getMemoryVT();
1091 SDValue MaskConstant;
1092 if (MemVT == MVT::i8) {
1093 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1094 } else {
1095 assert(MemVT == MVT::i16);
1096 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1097 }
1098 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1099 DAG.getConstant(2, MVT::i32));
1100 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1101 DAG.getConstant(0x00000003, VT));
1102 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1103 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1104 DAG.getConstant(3, VT));
1105 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1106 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1107 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1108 // vector instead.
1109 SDValue Src[4] = {
1110 ShiftedValue,
1111 DAG.getConstant(0, MVT::i32),
1112 DAG.getConstant(0, MVT::i32),
1113 Mask
1114 };
1115 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1116 SDValue Args[3] = { Chain, Input, DWordAddr };
1117 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1118 Op->getVTList(), Args, 3, MemVT,
1119 StoreNode->getMemOperand());
1120 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1121 Value.getValueType().bitsGE(MVT::i32)) {
1122 // Convert pointer from byte address to dword address.
1123 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1124 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1125 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001126
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001127 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001128 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001129 } else {
1130 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1131 }
1132 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001133 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001134 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001135
1136 EVT ValueVT = Value.getValueType();
1137
1138 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1139 return SDValue();
1140 }
1141
Tom Stellarde9373602014-01-22 19:24:14 +00001142 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1143 if (Ret.getNode()) {
1144 return Ret;
1145 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001146 // Lowering for indirect addressing
1147
1148 const MachineFunction &MF = DAG.getMachineFunction();
1149 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1150 getTargetMachine().getFrameLowering());
1151 unsigned StackWidth = TFL->getStackWidth(MF);
1152
1153 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1154
1155 if (ValueVT.isVector()) {
1156 unsigned NumElemVT = ValueVT.getVectorNumElements();
1157 EVT ElemVT = ValueVT.getVectorElementType();
1158 SDValue Stores[4];
1159
1160 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1161 "vector width in load");
1162
1163 for (unsigned i = 0; i < NumElemVT; ++i) {
1164 unsigned Channel, PtrIncr;
1165 getStackAddress(StackWidth, i, Channel, PtrIncr);
1166 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1167 DAG.getConstant(PtrIncr, MVT::i32));
1168 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1169 Value, DAG.getConstant(i, MVT::i32));
1170
1171 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1172 Chain, Elem, Ptr,
1173 DAG.getTargetConstant(Channel, MVT::i32));
1174 }
1175 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1176 } else {
1177 if (ValueVT == MVT::i8) {
1178 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1179 }
1180 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001181 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001182 }
1183
1184 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001185}
1186
Tom Stellard365366f2013-01-23 02:09:06 +00001187// return (512 + (kc_bank << 12)
1188static int
1189ConstantAddressBlock(unsigned AddressSpace) {
1190 switch (AddressSpace) {
1191 case AMDGPUAS::CONSTANT_BUFFER_0:
1192 return 512;
1193 case AMDGPUAS::CONSTANT_BUFFER_1:
1194 return 512 + 4096;
1195 case AMDGPUAS::CONSTANT_BUFFER_2:
1196 return 512 + 4096 * 2;
1197 case AMDGPUAS::CONSTANT_BUFFER_3:
1198 return 512 + 4096 * 3;
1199 case AMDGPUAS::CONSTANT_BUFFER_4:
1200 return 512 + 4096 * 4;
1201 case AMDGPUAS::CONSTANT_BUFFER_5:
1202 return 512 + 4096 * 5;
1203 case AMDGPUAS::CONSTANT_BUFFER_6:
1204 return 512 + 4096 * 6;
1205 case AMDGPUAS::CONSTANT_BUFFER_7:
1206 return 512 + 4096 * 7;
1207 case AMDGPUAS::CONSTANT_BUFFER_8:
1208 return 512 + 4096 * 8;
1209 case AMDGPUAS::CONSTANT_BUFFER_9:
1210 return 512 + 4096 * 9;
1211 case AMDGPUAS::CONSTANT_BUFFER_10:
1212 return 512 + 4096 * 10;
1213 case AMDGPUAS::CONSTANT_BUFFER_11:
1214 return 512 + 4096 * 11;
1215 case AMDGPUAS::CONSTANT_BUFFER_12:
1216 return 512 + 4096 * 12;
1217 case AMDGPUAS::CONSTANT_BUFFER_13:
1218 return 512 + 4096 * 13;
1219 case AMDGPUAS::CONSTANT_BUFFER_14:
1220 return 512 + 4096 * 14;
1221 case AMDGPUAS::CONSTANT_BUFFER_15:
1222 return 512 + 4096 * 15;
1223 default:
1224 return -1;
1225 }
1226}
1227
1228SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1229{
1230 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001231 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001232 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1233 SDValue Chain = Op.getOperand(0);
1234 SDValue Ptr = Op.getOperand(1);
1235 SDValue LoweredLoad;
1236
Tom Stellarde9373602014-01-22 19:24:14 +00001237 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1238 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001239 SDValue Ops[2] = {
1240 Ret,
1241 Chain
1242 };
Tom Stellarde9373602014-01-22 19:24:14 +00001243 return DAG.getMergeValues(Ops, 2, DL);
1244 }
1245
1246
Tom Stellard35bb18c2013-08-26 15:06:04 +00001247 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1248 SDValue MergedValues[2] = {
1249 SplitVectorLoad(Op, DAG),
1250 Chain
1251 };
1252 return DAG.getMergeValues(MergedValues, 2, DL);
1253 }
1254
Tom Stellard365366f2013-01-23 02:09:06 +00001255 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001256 if (ConstantBlock > -1 &&
1257 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1258 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001259 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001260 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1261 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001262 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001263 SDValue Slots[4];
1264 for (unsigned i = 0; i < 4; i++) {
1265 // We want Const position encoded with the following formula :
1266 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1267 // const_index is Ptr computed by llvm using an alignment of 16.
1268 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1269 // then div by 4 at the ISel step
1270 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1271 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1272 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1273 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001274 EVT NewVT = MVT::v4i32;
1275 unsigned NumElements = 4;
1276 if (VT.isVector()) {
1277 NewVT = VT;
1278 NumElements = VT.getVectorNumElements();
1279 }
1280 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001281 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001282 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001283 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001284 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001285 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001286 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001287 );
1288 }
1289
1290 if (!VT.isVector()) {
1291 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1292 DAG.getConstant(0, MVT::i32));
1293 }
1294
1295 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001296 Result,
1297 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001298 };
1299 return DAG.getMergeValues(MergedValues, 2, DL);
1300 }
1301
Matt Arsenault909d0c02013-10-30 23:43:29 +00001302 // For most operations returning SDValue() will result in the node being
1303 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1304 // need to manually expand loads that may be legal in some address spaces and
1305 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1306 // compute shaders, since the data is sign extended when it is uploaded to the
1307 // buffer. However SEXT loads from other address spaces are not supported, so
1308 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001309 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1310 EVT MemVT = LoadNode->getMemoryVT();
1311 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1312 SDValue ShiftAmount =
1313 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1314 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1315 LoadNode->getPointerInfo(), MemVT,
1316 LoadNode->isVolatile(),
1317 LoadNode->isNonTemporal(),
1318 LoadNode->getAlignment());
1319 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1320 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1321
1322 SDValue MergedValues[2] = { Sra, Chain };
1323 return DAG.getMergeValues(MergedValues, 2, DL);
1324 }
1325
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001326 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1327 return SDValue();
1328 }
1329
1330 // Lowering for indirect addressing
1331 const MachineFunction &MF = DAG.getMachineFunction();
1332 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1333 getTargetMachine().getFrameLowering());
1334 unsigned StackWidth = TFL->getStackWidth(MF);
1335
1336 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1337
1338 if (VT.isVector()) {
1339 unsigned NumElemVT = VT.getVectorNumElements();
1340 EVT ElemVT = VT.getVectorElementType();
1341 SDValue Loads[4];
1342
1343 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1344 "vector width in load");
1345
1346 for (unsigned i = 0; i < NumElemVT; ++i) {
1347 unsigned Channel, PtrIncr;
1348 getStackAddress(StackWidth, i, Channel, PtrIncr);
1349 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1350 DAG.getConstant(PtrIncr, MVT::i32));
1351 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1352 Chain, Ptr,
1353 DAG.getTargetConstant(Channel, MVT::i32),
1354 Op.getOperand(2));
1355 }
1356 for (unsigned i = NumElemVT; i < 4; ++i) {
1357 Loads[i] = DAG.getUNDEF(ElemVT);
1358 }
1359 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1360 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1361 } else {
1362 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1363 Chain, Ptr,
1364 DAG.getTargetConstant(0, MVT::i32), // Channel
1365 Op.getOperand(2));
1366 }
1367
Matt Arsenault7939acd2014-04-07 16:44:24 +00001368 SDValue Ops[2] = {
1369 LoweredLoad,
1370 Chain
1371 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001372
1373 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001374}
Tom Stellard75aadc22012-12-11 21:25:42 +00001375
Tom Stellard75aadc22012-12-11 21:25:42 +00001376/// XXX Only kernel functions are supported, so we can assume for now that
1377/// every function is a kernel function, but in the future we should use
1378/// separate calling conventions for kernel and non-kernel functions.
1379SDValue R600TargetLowering::LowerFormalArguments(
1380 SDValue Chain,
1381 CallingConv::ID CallConv,
1382 bool isVarArg,
1383 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001384 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001385 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001386 SmallVector<CCValAssign, 16> ArgLocs;
1387 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1388 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001389 MachineFunction &MF = DAG.getMachineFunction();
1390 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001391
Tom Stellardaf775432013-10-23 00:44:32 +00001392 SmallVector<ISD::InputArg, 8> LocalIns;
1393
Matt Arsenault209a7b92014-04-18 07:40:20 +00001394 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001395
1396 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001397
Tom Stellard1e803092013-07-23 01:48:18 +00001398 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001399 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001400 EVT VT = Ins[i].VT;
1401 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001402
Vincent Lejeunef143af32013-11-11 22:10:24 +00001403 if (ShaderType != ShaderType::COMPUTE) {
1404 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1405 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1406 InVals.push_back(Register);
1407 continue;
1408 }
1409
Tom Stellard75aadc22012-12-11 21:25:42 +00001410 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001411 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001412
Matt Arsenaultfae02982014-03-17 18:58:11 +00001413 // i64 isn't a legal type, so the register type used ends up as i32, which
1414 // isn't expected here. It attempts to create this sextload, but it ends up
1415 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1416 // for <1 x i64>.
1417
Tom Stellardacfeebf2013-07-23 01:48:05 +00001418 // The first 36 bytes of the input buffer contains information about
1419 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001420
1421 // FIXME: This should really check the extload type, but the handling of
1422 // extload vecto parameters seems to be broken.
1423 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1424 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1425 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001426 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1427 MachinePointerInfo(UndefValue::get(PtrTy)),
1428 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001429
1430 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001431 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001432 }
1433 return Chain;
1434}
1435
Matt Arsenault758659232013-05-18 00:21:46 +00001436EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001437 if (!VT.isVector())
1438 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001439 return VT.changeVectorElementTypeToInteger();
1440}
1441
Matt Arsenault209a7b92014-04-18 07:40:20 +00001442static SDValue CompactSwizzlableVector(
1443 SelectionDAG &DAG, SDValue VectorEntry,
1444 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001445 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1446 assert(RemapSwizzle.empty());
1447 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001448 VectorEntry.getOperand(0),
1449 VectorEntry.getOperand(1),
1450 VectorEntry.getOperand(2),
1451 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001452 };
1453
1454 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001455 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1456 // We mask write here to teach later passes that the ith element of this
1457 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1458 // break false dependencies and additionnaly make assembly easier to read.
1459 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001460 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1461 if (C->isZero()) {
1462 RemapSwizzle[i] = 4; // SEL_0
1463 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1464 } else if (C->isExactlyValue(1.0)) {
1465 RemapSwizzle[i] = 5; // SEL_1
1466 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1467 }
1468 }
1469
1470 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1471 continue;
1472 for (unsigned j = 0; j < i; j++) {
1473 if (NewBldVec[i] == NewBldVec[j]) {
1474 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1475 RemapSwizzle[i] = j;
1476 break;
1477 }
1478 }
1479 }
1480
1481 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Matt Arsenault209a7b92014-04-18 07:40:20 +00001482 VectorEntry.getValueType(), NewBldVec, 4);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001483}
1484
Benjamin Kramer193960c2013-06-11 13:32:25 +00001485static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1486 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001487 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1488 assert(RemapSwizzle.empty());
1489 SDValue NewBldVec[4] = {
1490 VectorEntry.getOperand(0),
1491 VectorEntry.getOperand(1),
1492 VectorEntry.getOperand(2),
1493 VectorEntry.getOperand(3)
1494 };
1495 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001496 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001497 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001498 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1499 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1500 ->getZExtValue();
1501 if (i == Idx)
1502 isUnmovable[Idx] = true;
1503 }
1504 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001505
1506 for (unsigned i = 0; i < 4; i++) {
1507 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1508 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1509 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001510 if (isUnmovable[Idx])
1511 continue;
1512 // Swap i and Idx
1513 std::swap(NewBldVec[Idx], NewBldVec[i]);
1514 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1515 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001516 }
1517 }
1518
1519 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1520 VectorEntry.getValueType(), NewBldVec, 4);
1521}
1522
1523
1524SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1525SDValue Swz[4], SelectionDAG &DAG) const {
1526 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1527 // Old -> New swizzle values
1528 DenseMap<unsigned, unsigned> SwizzleRemap;
1529
1530 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1531 for (unsigned i = 0; i < 4; i++) {
1532 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1533 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1534 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1535 }
1536
1537 SwizzleRemap.clear();
1538 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1539 for (unsigned i = 0; i < 4; i++) {
1540 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1541 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1542 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1543 }
1544
1545 return BuildVector;
1546}
1547
1548
Tom Stellard75aadc22012-12-11 21:25:42 +00001549//===----------------------------------------------------------------------===//
1550// Custom DAG Optimizations
1551//===----------------------------------------------------------------------===//
1552
1553SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1554 DAGCombinerInfo &DCI) const {
1555 SelectionDAG &DAG = DCI.DAG;
1556
1557 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001558 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001559 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1560 case ISD::FP_ROUND: {
1561 SDValue Arg = N->getOperand(0);
1562 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001563 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001564 Arg.getOperand(0));
1565 }
1566 break;
1567 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001568
1569 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1570 // (i32 select_cc f32, f32, -1, 0 cc)
1571 //
1572 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1573 // this to one of the SET*_DX10 instructions.
1574 case ISD::FP_TO_SINT: {
1575 SDValue FNeg = N->getOperand(0);
1576 if (FNeg.getOpcode() != ISD::FNEG) {
1577 return SDValue();
1578 }
1579 SDValue SelectCC = FNeg.getOperand(0);
1580 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1581 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1582 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1583 !isHWTrueValue(SelectCC.getOperand(2)) ||
1584 !isHWFalseValue(SelectCC.getOperand(3))) {
1585 return SDValue();
1586 }
1587
Andrew Trickef9de2a2013-05-25 02:42:55 +00001588 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001589 SelectCC.getOperand(0), // LHS
1590 SelectCC.getOperand(1), // RHS
1591 DAG.getConstant(-1, MVT::i32), // True
1592 DAG.getConstant(0, MVT::i32), // Flase
1593 SelectCC.getOperand(4)); // CC
1594
1595 break;
1596 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001597
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001598 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1599 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001600 case ISD::INSERT_VECTOR_ELT: {
1601 SDValue InVec = N->getOperand(0);
1602 SDValue InVal = N->getOperand(1);
1603 SDValue EltNo = N->getOperand(2);
1604 SDLoc dl(N);
1605
1606 // If the inserted element is an UNDEF, just use the input vector.
1607 if (InVal.getOpcode() == ISD::UNDEF)
1608 return InVec;
1609
1610 EVT VT = InVec.getValueType();
1611
1612 // If we can't generate a legal BUILD_VECTOR, exit
1613 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1614 return SDValue();
1615
1616 // Check that we know which element is being inserted
1617 if (!isa<ConstantSDNode>(EltNo))
1618 return SDValue();
1619 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1620
1621 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1622 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1623 // vector elements.
1624 SmallVector<SDValue, 8> Ops;
1625 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1626 Ops.append(InVec.getNode()->op_begin(),
1627 InVec.getNode()->op_end());
1628 } else if (InVec.getOpcode() == ISD::UNDEF) {
1629 unsigned NElts = VT.getVectorNumElements();
1630 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1631 } else {
1632 return SDValue();
1633 }
1634
1635 // Insert the element
1636 if (Elt < Ops.size()) {
1637 // All the operands of BUILD_VECTOR must have the same type;
1638 // we enforce that here.
1639 EVT OpVT = Ops[0].getValueType();
1640 if (InVal.getValueType() != OpVT)
1641 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1642 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1643 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1644 Ops[Elt] = InVal;
1645 }
1646
1647 // Return the new vector
1648 return DAG.getNode(ISD::BUILD_VECTOR, dl,
Matt Arsenault7939acd2014-04-07 16:44:24 +00001649 VT, Ops.data(), Ops.size());
Quentin Colombete2e05482013-07-30 00:27:16 +00001650 }
1651
Tom Stellard365366f2013-01-23 02:09:06 +00001652 // Extract_vec (Build_vector) generated by custom lowering
1653 // also needs to be customly combined
1654 case ISD::EXTRACT_VECTOR_ELT: {
1655 SDValue Arg = N->getOperand(0);
1656 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1657 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1658 unsigned Element = Const->getZExtValue();
1659 return Arg->getOperand(Element);
1660 }
1661 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001662 if (Arg.getOpcode() == ISD::BITCAST &&
1663 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1664 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1665 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001666 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001667 Arg->getOperand(0).getOperand(Element));
1668 }
1669 }
Tom Stellard365366f2013-01-23 02:09:06 +00001670 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001671
1672 case ISD::SELECT_CC: {
1673 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1674 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001675 //
1676 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1677 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001678 SDValue LHS = N->getOperand(0);
1679 if (LHS.getOpcode() != ISD::SELECT_CC) {
1680 return SDValue();
1681 }
1682
1683 SDValue RHS = N->getOperand(1);
1684 SDValue True = N->getOperand(2);
1685 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001686 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001687
1688 if (LHS.getOperand(2).getNode() != True.getNode() ||
1689 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001690 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001691 return SDValue();
1692 }
1693
Tom Stellard5e524892013-03-08 15:37:11 +00001694 switch (NCC) {
1695 default: return SDValue();
1696 case ISD::SETNE: return LHS;
1697 case ISD::SETEQ: {
1698 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1699 LHSCC = ISD::getSetCCInverse(LHSCC,
1700 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001701 if (DCI.isBeforeLegalizeOps() ||
1702 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1703 return DAG.getSelectCC(SDLoc(N),
1704 LHS.getOperand(0),
1705 LHS.getOperand(1),
1706 LHS.getOperand(2),
1707 LHS.getOperand(3),
1708 LHSCC);
1709 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001710 }
Tom Stellard5e524892013-03-08 15:37:11 +00001711 }
Tom Stellardcd428182013-09-28 02:50:38 +00001712 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001713 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001714
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001715 case AMDGPUISD::EXPORT: {
1716 SDValue Arg = N->getOperand(1);
1717 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1718 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001719
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001720 SDValue NewArgs[8] = {
1721 N->getOperand(0), // Chain
1722 SDValue(),
1723 N->getOperand(2), // ArrayBase
1724 N->getOperand(3), // Type
1725 N->getOperand(4), // SWZ_X
1726 N->getOperand(5), // SWZ_Y
1727 N->getOperand(6), // SWZ_Z
1728 N->getOperand(7) // SWZ_W
1729 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001730 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001731 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001732 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001733 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001734 case AMDGPUISD::TEXTURE_FETCH: {
1735 SDValue Arg = N->getOperand(1);
1736 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1737 break;
1738
1739 SDValue NewArgs[19] = {
1740 N->getOperand(0),
1741 N->getOperand(1),
1742 N->getOperand(2),
1743 N->getOperand(3),
1744 N->getOperand(4),
1745 N->getOperand(5),
1746 N->getOperand(6),
1747 N->getOperand(7),
1748 N->getOperand(8),
1749 N->getOperand(9),
1750 N->getOperand(10),
1751 N->getOperand(11),
1752 N->getOperand(12),
1753 N->getOperand(13),
1754 N->getOperand(14),
1755 N->getOperand(15),
1756 N->getOperand(16),
1757 N->getOperand(17),
1758 N->getOperand(18),
1759 };
1760 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1761 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1762 NewArgs, 19);
1763 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001764 }
1765 return SDValue();
1766}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001767
1768static bool
1769FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001770 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001771 const R600InstrInfo *TII =
1772 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1773 if (!Src.isMachineOpcode())
1774 return false;
1775 switch (Src.getMachineOpcode()) {
1776 case AMDGPU::FNEG_R600:
1777 if (!Neg.getNode())
1778 return false;
1779 Src = Src.getOperand(0);
1780 Neg = DAG.getTargetConstant(1, MVT::i32);
1781 return true;
1782 case AMDGPU::FABS_R600:
1783 if (!Abs.getNode())
1784 return false;
1785 Src = Src.getOperand(0);
1786 Abs = DAG.getTargetConstant(1, MVT::i32);
1787 return true;
1788 case AMDGPU::CONST_COPY: {
1789 unsigned Opcode = ParentNode->getMachineOpcode();
1790 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1791
1792 if (!Sel.getNode())
1793 return false;
1794
1795 SDValue CstOffset = Src.getOperand(0);
1796 if (ParentNode->getValueType(0).isVector())
1797 return false;
1798
1799 // Gather constants values
1800 int SrcIndices[] = {
1801 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1802 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1803 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1804 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1805 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1806 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1807 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1808 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1809 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1810 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1811 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1812 };
1813 std::vector<unsigned> Consts;
1814 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1815 int OtherSrcIdx = SrcIndices[i];
1816 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1817 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1818 continue;
1819 if (HasDst) {
1820 OtherSrcIdx--;
1821 OtherSelIdx--;
1822 }
1823 if (RegisterSDNode *Reg =
1824 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1825 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1826 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1827 ParentNode->getOperand(OtherSelIdx));
1828 Consts.push_back(Cst->getZExtValue());
1829 }
1830 }
1831 }
1832
1833 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1834 Consts.push_back(Cst->getZExtValue());
1835 if (!TII->fitsConstReadLimitations(Consts)) {
1836 return false;
1837 }
1838
1839 Sel = CstOffset;
1840 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1841 return true;
1842 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001843 case AMDGPU::MOV_IMM_I32:
1844 case AMDGPU::MOV_IMM_F32: {
1845 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1846 uint64_t ImmValue = 0;
1847
1848
1849 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1850 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1851 float FloatValue = FPC->getValueAPF().convertToFloat();
1852 if (FloatValue == 0.0) {
1853 ImmReg = AMDGPU::ZERO;
1854 } else if (FloatValue == 0.5) {
1855 ImmReg = AMDGPU::HALF;
1856 } else if (FloatValue == 1.0) {
1857 ImmReg = AMDGPU::ONE;
1858 } else {
1859 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1860 }
1861 } else {
1862 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1863 uint64_t Value = C->getZExtValue();
1864 if (Value == 0) {
1865 ImmReg = AMDGPU::ZERO;
1866 } else if (Value == 1) {
1867 ImmReg = AMDGPU::ONE_INT;
1868 } else {
1869 ImmValue = Value;
1870 }
1871 }
1872
1873 // Check that we aren't already using an immediate.
1874 // XXX: It's possible for an instruction to have more than one
1875 // immediate operand, but this is not supported yet.
1876 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1877 if (!Imm.getNode())
1878 return false;
1879 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1880 assert(C);
1881 if (C->getZExtValue())
1882 return false;
1883 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1884 }
1885 Src = DAG.getRegister(ImmReg, MVT::i32);
1886 return true;
1887 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001888 default:
1889 return false;
1890 }
1891}
1892
1893
1894/// \brief Fold the instructions after selecting them
1895SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1896 SelectionDAG &DAG) const {
1897 const R600InstrInfo *TII =
1898 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1899 if (!Node->isMachineOpcode())
1900 return Node;
1901 unsigned Opcode = Node->getMachineOpcode();
1902 SDValue FakeOp;
1903
1904 std::vector<SDValue> Ops;
1905 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1906 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001907 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001908
1909 if (Opcode == AMDGPU::DOT_4) {
1910 int OperandIdx[] = {
1911 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1912 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1913 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1914 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1915 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1916 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1917 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1918 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001919 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001920 int NegIdx[] = {
1921 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1922 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1923 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1924 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1925 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1926 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1927 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1928 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1929 };
1930 int AbsIdx[] = {
1931 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1932 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1933 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1934 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1935 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1936 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1937 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1938 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1939 };
1940 for (unsigned i = 0; i < 8; i++) {
1941 if (OperandIdx[i] < 0)
1942 return Node;
1943 SDValue &Src = Ops[OperandIdx[i] - 1];
1944 SDValue &Neg = Ops[NegIdx[i] - 1];
1945 SDValue &Abs = Ops[AbsIdx[i] - 1];
1946 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1947 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1948 if (HasDst)
1949 SelIdx--;
1950 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001951 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1952 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1953 }
1954 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1955 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1956 SDValue &Src = Ops[i];
1957 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001958 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1959 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001960 } else if (Opcode == AMDGPU::CLAMP_R600) {
1961 SDValue Src = Node->getOperand(0);
1962 if (!Src.isMachineOpcode() ||
1963 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1964 return Node;
1965 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1966 AMDGPU::OpName::clamp);
1967 if (ClampIdx < 0)
1968 return Node;
1969 std::vector<SDValue> Ops;
1970 unsigned NumOp = Src.getNumOperands();
1971 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001972 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001973 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1974 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1975 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001976 } else {
1977 if (!TII->hasInstrModifiers(Opcode))
1978 return Node;
1979 int OperandIdx[] = {
1980 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1981 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1982 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1983 };
1984 int NegIdx[] = {
1985 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1986 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1987 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1988 };
1989 int AbsIdx[] = {
1990 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1991 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1992 -1
1993 };
1994 for (unsigned i = 0; i < 3; i++) {
1995 if (OperandIdx[i] < 0)
1996 return Node;
1997 SDValue &Src = Ops[OperandIdx[i] - 1];
1998 SDValue &Neg = Ops[NegIdx[i] - 1];
1999 SDValue FakeAbs;
2000 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2001 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2002 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002003 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2004 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002005 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002006 ImmIdx--;
2007 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002008 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002009 SDValue &Imm = Ops[ImmIdx];
2010 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002011 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2012 }
2013 }
2014
2015 return Node;
2016}