blob: 3470306ed012a0f48e96dc17e2cf40148c6c2c8b [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Matt Arsenault4e466652014-04-16 01:41:30 +000089 // Expand sign extension of vectors
90 if (!Subtarget->hasBFE())
91 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
92
93 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
94 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
95
96 if (!Subtarget->hasBFE())
97 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
99 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
100
101 if (!Subtarget->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
103 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
104 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
105
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
109
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
111
112
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000113 // Legalize loads and stores to the private address space.
114 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000115 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000116 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000117
118 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
119 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +0000120 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
121 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
122 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000124 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
125 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
126
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000127 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000128 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000129 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000130 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000131 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
132 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133
Tom Stellard365366f2013-01-23 02:09:06 +0000134 setOperationAction(ISD::LOAD, MVT::i32, Custom);
135 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000136 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
137
Tom Stellard75aadc22012-12-11 21:25:42 +0000138 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000139 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000140 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000141 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000142 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000143
Michel Danzer49812b52013-07-10 16:37:07 +0000144 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
145
Tom Stellardb852af52013-03-08 15:37:03 +0000146 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000147 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000148 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000149}
150
151MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
152 MachineInstr * MI, MachineBasicBlock * BB) const {
153 MachineFunction * MF = BB->getParent();
154 MachineRegisterInfo &MRI = MF->getRegInfo();
155 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000156 const R600InstrInfo *TII =
157 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000158
159 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000160 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000161 // Replace LDS_*_RET instruction that don't have any uses with the
162 // equivalent LDS_*_NORET instruction.
163 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000164 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
165 assert(DstIdx != -1);
166 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000167 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
168 return BB;
169
170 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
171 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000172 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
173 NewMI.addOperand(MI->getOperand(i));
174 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000175 } else {
176 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
177 }
178 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000179 case AMDGPU::CLAMP_R600: {
180 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
181 AMDGPU::MOV,
182 MI->getOperand(0).getReg(),
183 MI->getOperand(1).getReg());
184 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
185 break;
186 }
187
188 case AMDGPU::FABS_R600: {
189 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
190 AMDGPU::MOV,
191 MI->getOperand(0).getReg(),
192 MI->getOperand(1).getReg());
193 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
194 break;
195 }
196
197 case AMDGPU::FNEG_R600: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
199 AMDGPU::MOV,
200 MI->getOperand(0).getReg(),
201 MI->getOperand(1).getReg());
202 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
203 break;
204 }
205
Tom Stellard75aadc22012-12-11 21:25:42 +0000206 case AMDGPU::MASK_WRITE: {
207 unsigned maskedRegister = MI->getOperand(0).getReg();
208 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
209 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
210 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
211 break;
212 }
213
214 case AMDGPU::MOV_IMM_F32:
215 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
216 MI->getOperand(1).getFPImm()->getValueAPF()
217 .bitcastToAPInt().getZExtValue());
218 break;
219 case AMDGPU::MOV_IMM_I32:
220 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
221 MI->getOperand(1).getImm());
222 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000223 case AMDGPU::CONST_COPY: {
224 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
225 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000226 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000227 MI->getOperand(1).getImm());
228 break;
229 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000230
231 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000232 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000233 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000234 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000235
236 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
237 .addOperand(MI->getOperand(0))
238 .addOperand(MI->getOperand(1))
239 .addImm(EOP); // Set End of program bit
240 break;
241 }
242
Tom Stellard75aadc22012-12-11 21:25:42 +0000243 case AMDGPU::TXD: {
244 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
245 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000246 MachineOperand &RID = MI->getOperand(4);
247 MachineOperand &SID = MI->getOperand(5);
248 unsigned TextureId = MI->getOperand(6).getImm();
249 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
250 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000251
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000252 switch (TextureId) {
253 case 5: // Rect
254 CTX = CTY = 0;
255 break;
256 case 6: // Shadow1D
257 SrcW = SrcZ;
258 break;
259 case 7: // Shadow2D
260 SrcW = SrcZ;
261 break;
262 case 8: // ShadowRect
263 CTX = CTY = 0;
264 SrcW = SrcZ;
265 break;
266 case 9: // 1DArray
267 SrcZ = SrcY;
268 CTZ = 0;
269 break;
270 case 10: // 2DArray
271 CTZ = 0;
272 break;
273 case 11: // Shadow1DArray
274 SrcZ = SrcY;
275 CTZ = 0;
276 break;
277 case 12: // Shadow2DArray
278 CTZ = 0;
279 break;
280 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000281 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
282 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000283 .addImm(SrcX)
284 .addImm(SrcY)
285 .addImm(SrcZ)
286 .addImm(SrcW)
287 .addImm(0)
288 .addImm(0)
289 .addImm(0)
290 .addImm(0)
291 .addImm(1)
292 .addImm(2)
293 .addImm(3)
294 .addOperand(RID)
295 .addOperand(SID)
296 .addImm(CTX)
297 .addImm(CTY)
298 .addImm(CTZ)
299 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000300 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
301 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000302 .addImm(SrcX)
303 .addImm(SrcY)
304 .addImm(SrcZ)
305 .addImm(SrcW)
306 .addImm(0)
307 .addImm(0)
308 .addImm(0)
309 .addImm(0)
310 .addImm(1)
311 .addImm(2)
312 .addImm(3)
313 .addOperand(RID)
314 .addOperand(SID)
315 .addImm(CTX)
316 .addImm(CTY)
317 .addImm(CTZ)
318 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000319 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
320 .addOperand(MI->getOperand(0))
321 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000322 .addImm(SrcX)
323 .addImm(SrcY)
324 .addImm(SrcZ)
325 .addImm(SrcW)
326 .addImm(0)
327 .addImm(0)
328 .addImm(0)
329 .addImm(0)
330 .addImm(1)
331 .addImm(2)
332 .addImm(3)
333 .addOperand(RID)
334 .addOperand(SID)
335 .addImm(CTX)
336 .addImm(CTY)
337 .addImm(CTZ)
338 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000339 .addReg(T0, RegState::Implicit)
340 .addReg(T1, RegState::Implicit);
341 break;
342 }
343
344 case AMDGPU::TXD_SHADOW: {
345 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
346 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 MachineOperand &RID = MI->getOperand(4);
348 MachineOperand &SID = MI->getOperand(5);
349 unsigned TextureId = MI->getOperand(6).getImm();
350 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
351 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
352
353 switch (TextureId) {
354 case 5: // Rect
355 CTX = CTY = 0;
356 break;
357 case 6: // Shadow1D
358 SrcW = SrcZ;
359 break;
360 case 7: // Shadow2D
361 SrcW = SrcZ;
362 break;
363 case 8: // ShadowRect
364 CTX = CTY = 0;
365 SrcW = SrcZ;
366 break;
367 case 9: // 1DArray
368 SrcZ = SrcY;
369 CTZ = 0;
370 break;
371 case 10: // 2DArray
372 CTZ = 0;
373 break;
374 case 11: // Shadow1DArray
375 SrcZ = SrcY;
376 CTZ = 0;
377 break;
378 case 12: // Shadow2DArray
379 CTZ = 0;
380 break;
381 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000382
383 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
384 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000385 .addImm(SrcX)
386 .addImm(SrcY)
387 .addImm(SrcZ)
388 .addImm(SrcW)
389 .addImm(0)
390 .addImm(0)
391 .addImm(0)
392 .addImm(0)
393 .addImm(1)
394 .addImm(2)
395 .addImm(3)
396 .addOperand(RID)
397 .addOperand(SID)
398 .addImm(CTX)
399 .addImm(CTY)
400 .addImm(CTZ)
401 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
403 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000404 .addImm(SrcX)
405 .addImm(SrcY)
406 .addImm(SrcZ)
407 .addImm(SrcW)
408 .addImm(0)
409 .addImm(0)
410 .addImm(0)
411 .addImm(0)
412 .addImm(1)
413 .addImm(2)
414 .addImm(3)
415 .addOperand(RID)
416 .addOperand(SID)
417 .addImm(CTX)
418 .addImm(CTY)
419 .addImm(CTZ)
420 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
422 .addOperand(MI->getOperand(0))
423 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000424 .addImm(SrcX)
425 .addImm(SrcY)
426 .addImm(SrcZ)
427 .addImm(SrcW)
428 .addImm(0)
429 .addImm(0)
430 .addImm(0)
431 .addImm(0)
432 .addImm(1)
433 .addImm(2)
434 .addImm(3)
435 .addOperand(RID)
436 .addOperand(SID)
437 .addImm(CTX)
438 .addImm(CTY)
439 .addImm(CTZ)
440 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000441 .addReg(T0, RegState::Implicit)
442 .addReg(T1, RegState::Implicit);
443 break;
444 }
445
446 case AMDGPU::BRANCH:
447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000448 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000449 break;
450
451 case AMDGPU::BRANCH_COND_f32: {
452 MachineInstr *NewMI =
453 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
454 AMDGPU::PREDICATE_BIT)
455 .addOperand(MI->getOperand(1))
456 .addImm(OPCODE_IS_NOT_ZERO)
457 .addImm(0); // Flags
458 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000459 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000460 .addOperand(MI->getOperand(0))
461 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
462 break;
463 }
464
465 case AMDGPU::BRANCH_COND_i32: {
466 MachineInstr *NewMI =
467 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
468 AMDGPU::PREDICATE_BIT)
469 .addOperand(MI->getOperand(1))
470 .addImm(OPCODE_IS_NOT_ZERO_INT)
471 .addImm(0); // Flags
472 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000473 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000474 .addOperand(MI->getOperand(0))
475 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
476 break;
477 }
478
Tom Stellard75aadc22012-12-11 21:25:42 +0000479 case AMDGPU::EG_ExportSwz:
480 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000481 // Instruction is left unmodified if its not the last one of its type
482 bool isLastInstructionOfItsType = true;
483 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000484 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000485 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000486 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000487 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
488 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
489 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
490 .getImm();
491 if (CurrentInstExportType == InstExportType) {
492 isLastInstructionOfItsType = false;
493 break;
494 }
495 }
496 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000497 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000498 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000499 return BB;
500 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
501 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
502 .addOperand(MI->getOperand(0))
503 .addOperand(MI->getOperand(1))
504 .addOperand(MI->getOperand(2))
505 .addOperand(MI->getOperand(3))
506 .addOperand(MI->getOperand(4))
507 .addOperand(MI->getOperand(5))
508 .addOperand(MI->getOperand(6))
509 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000510 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 break;
512 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000513 case AMDGPU::RETURN: {
514 // RETURN instructions must have the live-out registers as implicit uses,
515 // otherwise they appear dead.
516 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
517 MachineInstrBuilder MIB(*MF, MI);
518 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
519 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
520 return BB;
521 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000522 }
523
524 MI->eraseFromParent();
525 return BB;
526}
527
528//===----------------------------------------------------------------------===//
529// Custom DAG Lowering Operations
530//===----------------------------------------------------------------------===//
531
Tom Stellard75aadc22012-12-11 21:25:42 +0000532SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000533 MachineFunction &MF = DAG.getMachineFunction();
534 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 switch (Op.getOpcode()) {
536 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000537 case ISD::FCOS:
538 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000539 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000540 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000541 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000542 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 case ISD::INTRINSIC_VOID: {
544 SDValue Chain = Op.getOperand(0);
545 unsigned IntrinsicID =
546 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
547 switch (IntrinsicID) {
548 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000549 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
550 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000551 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000552 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000553 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000554 case AMDGPUIntrinsic::R600_store_swizzle: {
555 const SDValue Args[8] = {
556 Chain,
557 Op.getOperand(2), // Export Value
558 Op.getOperand(3), // ArrayBase
559 Op.getOperand(4), // Type
560 DAG.getConstant(0, MVT::i32), // SWZ_X
561 DAG.getConstant(1, MVT::i32), // SWZ_Y
562 DAG.getConstant(2, MVT::i32), // SWZ_Z
563 DAG.getConstant(3, MVT::i32) // SWZ_W
564 };
Craig Topper48d114b2014-04-26 18:35:24 +0000565 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000567
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 // default for switch(IntrinsicID)
569 default: break;
570 }
571 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
572 break;
573 }
574 case ISD::INTRINSIC_WO_CHAIN: {
575 unsigned IntrinsicID =
576 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
577 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000578 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000579 switch(IntrinsicID) {
580 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000581 case AMDGPUIntrinsic::R600_load_input: {
582 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
583 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
584 MachineFunction &MF = DAG.getMachineFunction();
585 MachineRegisterInfo &MRI = MF.getRegInfo();
586 MRI.addLiveIn(Reg);
587 return DAG.getCopyFromReg(DAG.getEntryNode(),
588 SDLoc(DAG.getEntryNode()), Reg, VT);
589 }
590
591 case AMDGPUIntrinsic::R600_interp_input: {
592 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
593 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
594 MachineSDNode *interp;
595 if (ijb < 0) {
596 const MachineFunction &MF = DAG.getMachineFunction();
597 const R600InstrInfo *TII =
598 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
599 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
600 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
601 return DAG.getTargetExtractSubreg(
602 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
603 DL, MVT::f32, SDValue(interp, 0));
604 }
605 MachineFunction &MF = DAG.getMachineFunction();
606 MachineRegisterInfo &MRI = MF.getRegInfo();
607 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
608 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
609 MRI.addLiveIn(RegisterI);
610 MRI.addLiveIn(RegisterJ);
611 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
612 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
613 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
614 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
615
616 if (slot % 4 < 2)
617 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
618 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
619 RegisterJNode, RegisterINode);
620 else
621 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
622 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
623 RegisterJNode, RegisterINode);
624 return SDValue(interp, slot % 2);
625 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000626 case AMDGPUIntrinsic::R600_interp_xy:
627 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000628 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000629 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000630 SDValue RegisterINode = Op.getOperand(2);
631 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000632
Vincent Lejeunef143af32013-11-11 22:10:24 +0000633 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000634 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000635 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000636 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000637 else
638 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000639 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000640 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000641 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
642 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000643 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000644 case AMDGPUIntrinsic::R600_tex:
645 case AMDGPUIntrinsic::R600_texc:
646 case AMDGPUIntrinsic::R600_txl:
647 case AMDGPUIntrinsic::R600_txlc:
648 case AMDGPUIntrinsic::R600_txb:
649 case AMDGPUIntrinsic::R600_txbc:
650 case AMDGPUIntrinsic::R600_txf:
651 case AMDGPUIntrinsic::R600_txq:
652 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000653 case AMDGPUIntrinsic::R600_ddy:
654 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000655 unsigned TextureOp;
656 switch (IntrinsicID) {
657 case AMDGPUIntrinsic::R600_tex:
658 TextureOp = 0;
659 break;
660 case AMDGPUIntrinsic::R600_texc:
661 TextureOp = 1;
662 break;
663 case AMDGPUIntrinsic::R600_txl:
664 TextureOp = 2;
665 break;
666 case AMDGPUIntrinsic::R600_txlc:
667 TextureOp = 3;
668 break;
669 case AMDGPUIntrinsic::R600_txb:
670 TextureOp = 4;
671 break;
672 case AMDGPUIntrinsic::R600_txbc:
673 TextureOp = 5;
674 break;
675 case AMDGPUIntrinsic::R600_txf:
676 TextureOp = 6;
677 break;
678 case AMDGPUIntrinsic::R600_txq:
679 TextureOp = 7;
680 break;
681 case AMDGPUIntrinsic::R600_ddx:
682 TextureOp = 8;
683 break;
684 case AMDGPUIntrinsic::R600_ddy:
685 TextureOp = 9;
686 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000687 case AMDGPUIntrinsic::R600_ldptr:
688 TextureOp = 10;
689 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000690 default:
691 llvm_unreachable("Unknow Texture Operation");
692 }
693
694 SDValue TexArgs[19] = {
695 DAG.getConstant(TextureOp, MVT::i32),
696 Op.getOperand(1),
697 DAG.getConstant(0, MVT::i32),
698 DAG.getConstant(1, MVT::i32),
699 DAG.getConstant(2, MVT::i32),
700 DAG.getConstant(3, MVT::i32),
701 Op.getOperand(2),
702 Op.getOperand(3),
703 Op.getOperand(4),
704 DAG.getConstant(0, MVT::i32),
705 DAG.getConstant(1, MVT::i32),
706 DAG.getConstant(2, MVT::i32),
707 DAG.getConstant(3, MVT::i32),
708 Op.getOperand(5),
709 Op.getOperand(6),
710 Op.getOperand(7),
711 Op.getOperand(8),
712 Op.getOperand(9),
713 Op.getOperand(10)
714 };
Craig Topper48d114b2014-04-26 18:35:24 +0000715 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000716 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000717 case AMDGPUIntrinsic::AMDGPU_dp4: {
718 SDValue Args[8] = {
719 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
720 DAG.getConstant(0, MVT::i32)),
721 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
722 DAG.getConstant(0, MVT::i32)),
723 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
724 DAG.getConstant(1, MVT::i32)),
725 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
726 DAG.getConstant(1, MVT::i32)),
727 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
728 DAG.getConstant(2, MVT::i32)),
729 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
730 DAG.getConstant(2, MVT::i32)),
731 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
732 DAG.getConstant(3, MVT::i32)),
733 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
734 DAG.getConstant(3, MVT::i32))
735 };
Craig Topper48d114b2014-04-26 18:35:24 +0000736 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000737 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000738
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000739 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000740 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000743 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000744 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000745 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000746 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000749 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000750 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000751 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000752 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000753 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000754 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000755 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000756 return LowerImplicitParameter(DAG, VT, DL, 8);
757
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000758 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000759 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
760 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000761 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000762 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
763 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000764 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000765 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
766 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000767 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
769 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
772 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
775 AMDGPU::T0_Z, VT);
776 }
777 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
778 break;
779 }
780 } // end switch(Op.getOpcode())
781 return SDValue();
782}
783
784void R600TargetLowering::ReplaceNodeResults(SDNode *N,
785 SmallVectorImpl<SDValue> &Results,
786 SelectionDAG &DAG) const {
787 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000788 default:
789 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
790 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000791 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000792 return;
793 case ISD::LOAD: {
794 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
795 Results.push_back(SDValue(Node, 0));
796 Results.push_back(SDValue(Node, 1));
797 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
798 // function
799 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
800 return;
801 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000802 case ISD::STORE:
803 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
804 Results.push_back(SDValue(Node, 0));
805 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 }
807}
808
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000809SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
810 // On hw >= R700, COS/SIN input must be between -1. and 1.
811 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
812 EVT VT = Op.getValueType();
813 SDValue Arg = Op.getOperand(0);
814 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
815 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
816 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
817 DAG.getConstantFP(0.15915494309, MVT::f32)),
818 DAG.getConstantFP(0.5, MVT::f32)));
819 unsigned TrigNode;
820 switch (Op.getOpcode()) {
821 case ISD::FCOS:
822 TrigNode = AMDGPUISD::COS_HW;
823 break;
824 case ISD::FSIN:
825 TrigNode = AMDGPUISD::SIN_HW;
826 break;
827 default:
828 llvm_unreachable("Wrong trig opcode");
829 }
830 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
831 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
832 DAG.getConstantFP(-0.5, MVT::f32)));
833 if (Gen >= AMDGPUSubtarget::R700)
834 return TrigVal;
835 // On R600 hw, COS/SIN input must be between -Pi and Pi.
836 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
837 DAG.getConstantFP(3.14159265359, MVT::f32));
838}
839
Tom Stellard75aadc22012-12-11 21:25:42 +0000840SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
841 return DAG.getNode(
842 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000843 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000844 MVT::i1,
845 Op, DAG.getConstantFP(0.0f, MVT::f32),
846 DAG.getCondCode(ISD::SETNE)
847 );
848}
849
Tom Stellard75aadc22012-12-11 21:25:42 +0000850SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000851 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000852 unsigned DwordOffset) const {
853 unsigned ByteOffset = DwordOffset * 4;
854 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000855 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000856
857 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
858 assert(isInt<16>(ByteOffset));
859
860 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
861 DAG.getConstant(ByteOffset, MVT::i32), // PTR
862 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
863 false, false, false, 0);
864}
865
Tom Stellard75aadc22012-12-11 21:25:42 +0000866bool R600TargetLowering::isZero(SDValue Op) const {
867 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
868 return Cst->isNullValue();
869 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
870 return CstFP->isZero();
871 } else {
872 return false;
873 }
874}
875
876SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000877 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000878 EVT VT = Op.getValueType();
879
880 SDValue LHS = Op.getOperand(0);
881 SDValue RHS = Op.getOperand(1);
882 SDValue True = Op.getOperand(2);
883 SDValue False = Op.getOperand(3);
884 SDValue CC = Op.getOperand(4);
885 SDValue Temp;
886
887 // LHS and RHS are guaranteed to be the same value type
888 EVT CompareVT = LHS.getValueType();
889
890 // Check if we can lower this to a native operation.
891
Tom Stellard2add82d2013-03-08 15:37:09 +0000892 // Try to lower to a SET* instruction:
893 //
894 // SET* can match the following patterns:
895 //
Tom Stellardcd428182013-09-28 02:50:38 +0000896 // select_cc f32, f32, -1, 0, cc_supported
897 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
898 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000899 //
900
901 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000902 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
903 ISD::CondCode InverseCC =
904 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000905 if (isHWTrueValue(False) && isHWFalseValue(True)) {
906 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
907 std::swap(False, True);
908 CC = DAG.getCondCode(InverseCC);
909 } else {
910 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
911 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
912 std::swap(False, True);
913 std::swap(LHS, RHS);
914 CC = DAG.getCondCode(SwapInvCC);
915 }
916 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000917 }
918
919 if (isHWTrueValue(True) && isHWFalseValue(False) &&
920 (CompareVT == VT || VT == MVT::i32)) {
921 // This can be matched by a SET* instruction.
922 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
923 }
924
Tom Stellard75aadc22012-12-11 21:25:42 +0000925 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000926 //
927 // CND* can match the following patterns:
928 //
Tom Stellardcd428182013-09-28 02:50:38 +0000929 // select_cc f32, 0.0, f32, f32, cc_supported
930 // select_cc f32, 0.0, i32, i32, cc_supported
931 // select_cc i32, 0, f32, f32, cc_supported
932 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000933 //
Tom Stellardcd428182013-09-28 02:50:38 +0000934
935 // Try to move the zero value to the RHS
936 if (isZero(LHS)) {
937 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
938 // Try swapping the operands
939 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
940 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
941 std::swap(LHS, RHS);
942 CC = DAG.getCondCode(CCSwapped);
943 } else {
944 // Try inverting the conditon and then swapping the operands
945 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
946 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
947 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
948 std::swap(True, False);
949 std::swap(LHS, RHS);
950 CC = DAG.getCondCode(CCSwapped);
951 }
952 }
953 }
954 if (isZero(RHS)) {
955 SDValue Cond = LHS;
956 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000957 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
958 if (CompareVT != VT) {
959 // Bitcast True / False to the correct types. This will end up being
960 // a nop, but it allows us to define only a single pattern in the
961 // .TD files for each CND* instruction rather than having to have
962 // one pattern for integer True/False and one for fp True/False
963 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
964 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
965 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000966
967 switch (CCOpcode) {
968 case ISD::SETONE:
969 case ISD::SETUNE:
970 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000971 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
972 Temp = True;
973 True = False;
974 False = Temp;
975 break;
976 default:
977 break;
978 }
979 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
980 Cond, Zero,
981 True, False,
982 DAG.getCondCode(CCOpcode));
983 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
984 }
985
Tom Stellard75aadc22012-12-11 21:25:42 +0000986
987 // Possible Min/Max pattern
988 SDValue MinMax = LowerMinMax(Op, DAG);
989 if (MinMax.getNode()) {
990 return MinMax;
991 }
992
993 // If we make it this for it means we have no native instructions to handle
994 // this SELECT_CC, so we must lower it.
995 SDValue HWTrue, HWFalse;
996
997 if (CompareVT == MVT::f32) {
998 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
999 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
1000 } else if (CompareVT == MVT::i32) {
1001 HWTrue = DAG.getConstant(-1, CompareVT);
1002 HWFalse = DAG.getConstant(0, CompareVT);
1003 }
1004 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001005 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001006 }
1007
1008 // Lower this unsupported SELECT_CC into a combination of two supported
1009 // SELECT_CC operations.
1010 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1011
1012 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1013 Cond, HWFalse,
1014 True, False,
1015 DAG.getCondCode(ISD::SETNE));
1016}
1017
Alp Tokercb402912014-01-24 17:20:08 +00001018/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001019/// convert these pointers to a register index. Each register holds
1020/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1021/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1022/// for indirect addressing.
1023SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1024 unsigned StackWidth,
1025 SelectionDAG &DAG) const {
1026 unsigned SRLPad;
1027 switch(StackWidth) {
1028 case 1:
1029 SRLPad = 2;
1030 break;
1031 case 2:
1032 SRLPad = 3;
1033 break;
1034 case 4:
1035 SRLPad = 4;
1036 break;
1037 default: llvm_unreachable("Invalid stack width");
1038 }
1039
Andrew Trickef9de2a2013-05-25 02:42:55 +00001040 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001041 DAG.getConstant(SRLPad, MVT::i32));
1042}
1043
1044void R600TargetLowering::getStackAddress(unsigned StackWidth,
1045 unsigned ElemIdx,
1046 unsigned &Channel,
1047 unsigned &PtrIncr) const {
1048 switch (StackWidth) {
1049 default:
1050 case 1:
1051 Channel = 0;
1052 if (ElemIdx > 0) {
1053 PtrIncr = 1;
1054 } else {
1055 PtrIncr = 0;
1056 }
1057 break;
1058 case 2:
1059 Channel = ElemIdx % 2;
1060 if (ElemIdx == 2) {
1061 PtrIncr = 1;
1062 } else {
1063 PtrIncr = 0;
1064 }
1065 break;
1066 case 4:
1067 Channel = ElemIdx;
1068 PtrIncr = 0;
1069 break;
1070 }
1071}
1072
Tom Stellard75aadc22012-12-11 21:25:42 +00001073SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001074 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001075 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1076 SDValue Chain = Op.getOperand(0);
1077 SDValue Value = Op.getOperand(1);
1078 SDValue Ptr = Op.getOperand(2);
1079
Tom Stellard2ffc3302013-08-26 15:05:44 +00001080 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001081 if (Result.getNode()) {
1082 return Result;
1083 }
1084
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001085 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1086 if (StoreNode->isTruncatingStore()) {
1087 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001088 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001089 EVT MemVT = StoreNode->getMemoryVT();
1090 SDValue MaskConstant;
1091 if (MemVT == MVT::i8) {
1092 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1093 } else {
1094 assert(MemVT == MVT::i16);
1095 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1096 }
1097 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1098 DAG.getConstant(2, MVT::i32));
1099 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1100 DAG.getConstant(0x00000003, VT));
1101 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1102 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1103 DAG.getConstant(3, VT));
1104 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1105 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1106 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1107 // vector instead.
1108 SDValue Src[4] = {
1109 ShiftedValue,
1110 DAG.getConstant(0, MVT::i32),
1111 DAG.getConstant(0, MVT::i32),
1112 Mask
1113 };
Craig Topper48d114b2014-04-26 18:35:24 +00001114 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001115 SDValue Args[3] = { Chain, Input, DWordAddr };
1116 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1117 Op->getVTList(), Args, 3, MemVT,
1118 StoreNode->getMemOperand());
1119 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1120 Value.getValueType().bitsGE(MVT::i32)) {
1121 // Convert pointer from byte address to dword address.
1122 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1123 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1124 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001125
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001126 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001127 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001128 } else {
1129 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1130 }
1131 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001132 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001133 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001134
1135 EVT ValueVT = Value.getValueType();
1136
1137 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1138 return SDValue();
1139 }
1140
Tom Stellarde9373602014-01-22 19:24:14 +00001141 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1142 if (Ret.getNode()) {
1143 return Ret;
1144 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001145 // Lowering for indirect addressing
1146
1147 const MachineFunction &MF = DAG.getMachineFunction();
1148 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1149 getTargetMachine().getFrameLowering());
1150 unsigned StackWidth = TFL->getStackWidth(MF);
1151
1152 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1153
1154 if (ValueVT.isVector()) {
1155 unsigned NumElemVT = ValueVT.getVectorNumElements();
1156 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001157 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001158
1159 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1160 "vector width in load");
1161
1162 for (unsigned i = 0; i < NumElemVT; ++i) {
1163 unsigned Channel, PtrIncr;
1164 getStackAddress(StackWidth, i, Channel, PtrIncr);
1165 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1166 DAG.getConstant(PtrIncr, MVT::i32));
1167 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1168 Value, DAG.getConstant(i, MVT::i32));
1169
1170 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1171 Chain, Elem, Ptr,
1172 DAG.getTargetConstant(Channel, MVT::i32));
1173 }
Craig Topper48d114b2014-04-26 18:35:24 +00001174 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001175 } else {
1176 if (ValueVT == MVT::i8) {
1177 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1178 }
1179 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001180 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001181 }
1182
1183 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001184}
1185
Tom Stellard365366f2013-01-23 02:09:06 +00001186// return (512 + (kc_bank << 12)
1187static int
1188ConstantAddressBlock(unsigned AddressSpace) {
1189 switch (AddressSpace) {
1190 case AMDGPUAS::CONSTANT_BUFFER_0:
1191 return 512;
1192 case AMDGPUAS::CONSTANT_BUFFER_1:
1193 return 512 + 4096;
1194 case AMDGPUAS::CONSTANT_BUFFER_2:
1195 return 512 + 4096 * 2;
1196 case AMDGPUAS::CONSTANT_BUFFER_3:
1197 return 512 + 4096 * 3;
1198 case AMDGPUAS::CONSTANT_BUFFER_4:
1199 return 512 + 4096 * 4;
1200 case AMDGPUAS::CONSTANT_BUFFER_5:
1201 return 512 + 4096 * 5;
1202 case AMDGPUAS::CONSTANT_BUFFER_6:
1203 return 512 + 4096 * 6;
1204 case AMDGPUAS::CONSTANT_BUFFER_7:
1205 return 512 + 4096 * 7;
1206 case AMDGPUAS::CONSTANT_BUFFER_8:
1207 return 512 + 4096 * 8;
1208 case AMDGPUAS::CONSTANT_BUFFER_9:
1209 return 512 + 4096 * 9;
1210 case AMDGPUAS::CONSTANT_BUFFER_10:
1211 return 512 + 4096 * 10;
1212 case AMDGPUAS::CONSTANT_BUFFER_11:
1213 return 512 + 4096 * 11;
1214 case AMDGPUAS::CONSTANT_BUFFER_12:
1215 return 512 + 4096 * 12;
1216 case AMDGPUAS::CONSTANT_BUFFER_13:
1217 return 512 + 4096 * 13;
1218 case AMDGPUAS::CONSTANT_BUFFER_14:
1219 return 512 + 4096 * 14;
1220 case AMDGPUAS::CONSTANT_BUFFER_15:
1221 return 512 + 4096 * 15;
1222 default:
1223 return -1;
1224 }
1225}
1226
1227SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1228{
1229 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001230 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001231 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1232 SDValue Chain = Op.getOperand(0);
1233 SDValue Ptr = Op.getOperand(1);
1234 SDValue LoweredLoad;
1235
Tom Stellarde9373602014-01-22 19:24:14 +00001236 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1237 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001238 SDValue Ops[2] = {
1239 Ret,
1240 Chain
1241 };
Tom Stellarde9373602014-01-22 19:24:14 +00001242 return DAG.getMergeValues(Ops, 2, DL);
1243 }
1244
1245
Tom Stellard35bb18c2013-08-26 15:06:04 +00001246 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1247 SDValue MergedValues[2] = {
1248 SplitVectorLoad(Op, DAG),
1249 Chain
1250 };
1251 return DAG.getMergeValues(MergedValues, 2, DL);
1252 }
1253
Tom Stellard365366f2013-01-23 02:09:06 +00001254 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001255 if (ConstantBlock > -1 &&
1256 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1257 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001258 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001259 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1260 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001261 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001262 SDValue Slots[4];
1263 for (unsigned i = 0; i < 4; i++) {
1264 // We want Const position encoded with the following formula :
1265 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1266 // const_index is Ptr computed by llvm using an alignment of 16.
1267 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1268 // then div by 4 at the ISel step
1269 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1270 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1271 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1272 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001273 EVT NewVT = MVT::v4i32;
1274 unsigned NumElements = 4;
1275 if (VT.isVector()) {
1276 NewVT = VT;
1277 NumElements = VT.getVectorNumElements();
1278 }
Craig Topper48d114b2014-04-26 18:35:24 +00001279 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
1280 ArrayRef<SDValue>(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001281 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001282 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001283 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001284 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001285 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001286 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001287 );
1288 }
1289
1290 if (!VT.isVector()) {
1291 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1292 DAG.getConstant(0, MVT::i32));
1293 }
1294
1295 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001296 Result,
1297 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001298 };
1299 return DAG.getMergeValues(MergedValues, 2, DL);
1300 }
1301
Matt Arsenault909d0c02013-10-30 23:43:29 +00001302 // For most operations returning SDValue() will result in the node being
1303 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1304 // need to manually expand loads that may be legal in some address spaces and
1305 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1306 // compute shaders, since the data is sign extended when it is uploaded to the
1307 // buffer. However SEXT loads from other address spaces are not supported, so
1308 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001309 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1310 EVT MemVT = LoadNode->getMemoryVT();
1311 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1312 SDValue ShiftAmount =
1313 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1314 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1315 LoadNode->getPointerInfo(), MemVT,
1316 LoadNode->isVolatile(),
1317 LoadNode->isNonTemporal(),
1318 LoadNode->getAlignment());
1319 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1320 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1321
1322 SDValue MergedValues[2] = { Sra, Chain };
1323 return DAG.getMergeValues(MergedValues, 2, DL);
1324 }
1325
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001326 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1327 return SDValue();
1328 }
1329
1330 // Lowering for indirect addressing
1331 const MachineFunction &MF = DAG.getMachineFunction();
1332 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1333 getTargetMachine().getFrameLowering());
1334 unsigned StackWidth = TFL->getStackWidth(MF);
1335
1336 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1337
1338 if (VT.isVector()) {
1339 unsigned NumElemVT = VT.getVectorNumElements();
1340 EVT ElemVT = VT.getVectorElementType();
1341 SDValue Loads[4];
1342
1343 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1344 "vector width in load");
1345
1346 for (unsigned i = 0; i < NumElemVT; ++i) {
1347 unsigned Channel, PtrIncr;
1348 getStackAddress(StackWidth, i, Channel, PtrIncr);
1349 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1350 DAG.getConstant(PtrIncr, MVT::i32));
1351 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1352 Chain, Ptr,
1353 DAG.getTargetConstant(Channel, MVT::i32),
1354 Op.getOperand(2));
1355 }
1356 for (unsigned i = NumElemVT; i < 4; ++i) {
1357 Loads[i] = DAG.getUNDEF(ElemVT);
1358 }
1359 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
Craig Topper48d114b2014-04-26 18:35:24 +00001360 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001361 } else {
1362 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1363 Chain, Ptr,
1364 DAG.getTargetConstant(0, MVT::i32), // Channel
1365 Op.getOperand(2));
1366 }
1367
Matt Arsenault7939acd2014-04-07 16:44:24 +00001368 SDValue Ops[2] = {
1369 LoweredLoad,
1370 Chain
1371 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001372
1373 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001374}
Tom Stellard75aadc22012-12-11 21:25:42 +00001375
Tom Stellard75aadc22012-12-11 21:25:42 +00001376/// XXX Only kernel functions are supported, so we can assume for now that
1377/// every function is a kernel function, but in the future we should use
1378/// separate calling conventions for kernel and non-kernel functions.
1379SDValue R600TargetLowering::LowerFormalArguments(
1380 SDValue Chain,
1381 CallingConv::ID CallConv,
1382 bool isVarArg,
1383 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001384 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001385 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001386 SmallVector<CCValAssign, 16> ArgLocs;
1387 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1388 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001389 MachineFunction &MF = DAG.getMachineFunction();
1390 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001391
Tom Stellardaf775432013-10-23 00:44:32 +00001392 SmallVector<ISD::InputArg, 8> LocalIns;
1393
Matt Arsenault209a7b92014-04-18 07:40:20 +00001394 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001395
1396 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001397
Tom Stellard1e803092013-07-23 01:48:18 +00001398 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001399 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001400 EVT VT = Ins[i].VT;
1401 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001402
Vincent Lejeunef143af32013-11-11 22:10:24 +00001403 if (ShaderType != ShaderType::COMPUTE) {
1404 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1405 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1406 InVals.push_back(Register);
1407 continue;
1408 }
1409
Tom Stellard75aadc22012-12-11 21:25:42 +00001410 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001411 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001412
Matt Arsenaultfae02982014-03-17 18:58:11 +00001413 // i64 isn't a legal type, so the register type used ends up as i32, which
1414 // isn't expected here. It attempts to create this sextload, but it ends up
1415 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1416 // for <1 x i64>.
1417
Tom Stellardacfeebf2013-07-23 01:48:05 +00001418 // The first 36 bytes of the input buffer contains information about
1419 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001420
1421 // FIXME: This should really check the extload type, but the handling of
1422 // extload vecto parameters seems to be broken.
1423 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1424 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1425 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001426 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1427 MachinePointerInfo(UndefValue::get(PtrTy)),
1428 MemVT, false, false, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001429
1430 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001431 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001432 }
1433 return Chain;
1434}
1435
Matt Arsenault758659232013-05-18 00:21:46 +00001436EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001437 if (!VT.isVector())
1438 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001439 return VT.changeVectorElementTypeToInteger();
1440}
1441
Matt Arsenault209a7b92014-04-18 07:40:20 +00001442static SDValue CompactSwizzlableVector(
1443 SelectionDAG &DAG, SDValue VectorEntry,
1444 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001445 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1446 assert(RemapSwizzle.empty());
1447 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001448 VectorEntry.getOperand(0),
1449 VectorEntry.getOperand(1),
1450 VectorEntry.getOperand(2),
1451 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001452 };
1453
1454 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001455 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1456 // We mask write here to teach later passes that the ith element of this
1457 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1458 // break false dependencies and additionnaly make assembly easier to read.
1459 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001460 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1461 if (C->isZero()) {
1462 RemapSwizzle[i] = 4; // SEL_0
1463 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1464 } else if (C->isExactlyValue(1.0)) {
1465 RemapSwizzle[i] = 5; // SEL_1
1466 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1467 }
1468 }
1469
1470 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1471 continue;
1472 for (unsigned j = 0; j < i; j++) {
1473 if (NewBldVec[i] == NewBldVec[j]) {
1474 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1475 RemapSwizzle[i] = j;
1476 break;
1477 }
1478 }
1479 }
1480
1481 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001482 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001483}
1484
Benjamin Kramer193960c2013-06-11 13:32:25 +00001485static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1486 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001487 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1488 assert(RemapSwizzle.empty());
1489 SDValue NewBldVec[4] = {
1490 VectorEntry.getOperand(0),
1491 VectorEntry.getOperand(1),
1492 VectorEntry.getOperand(2),
1493 VectorEntry.getOperand(3)
1494 };
1495 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001496 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001497 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001498 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1499 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1500 ->getZExtValue();
1501 if (i == Idx)
1502 isUnmovable[Idx] = true;
1503 }
1504 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001505
1506 for (unsigned i = 0; i < 4; i++) {
1507 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1508 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1509 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001510 if (isUnmovable[Idx])
1511 continue;
1512 // Swap i and Idx
1513 std::swap(NewBldVec[Idx], NewBldVec[i]);
1514 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1515 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001516 }
1517 }
1518
1519 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
Craig Topper48d114b2014-04-26 18:35:24 +00001520 VectorEntry.getValueType(), NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001521}
1522
1523
1524SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1525SDValue Swz[4], SelectionDAG &DAG) const {
1526 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1527 // Old -> New swizzle values
1528 DenseMap<unsigned, unsigned> SwizzleRemap;
1529
1530 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1531 for (unsigned i = 0; i < 4; i++) {
1532 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1533 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1534 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1535 }
1536
1537 SwizzleRemap.clear();
1538 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1539 for (unsigned i = 0; i < 4; i++) {
1540 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1541 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1542 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1543 }
1544
1545 return BuildVector;
1546}
1547
1548
Tom Stellard75aadc22012-12-11 21:25:42 +00001549//===----------------------------------------------------------------------===//
1550// Custom DAG Optimizations
1551//===----------------------------------------------------------------------===//
1552
1553SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1554 DAGCombinerInfo &DCI) const {
1555 SelectionDAG &DAG = DCI.DAG;
1556
1557 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001558 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001559 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1560 case ISD::FP_ROUND: {
1561 SDValue Arg = N->getOperand(0);
1562 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001563 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001564 Arg.getOperand(0));
1565 }
1566 break;
1567 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001568
1569 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1570 // (i32 select_cc f32, f32, -1, 0 cc)
1571 //
1572 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1573 // this to one of the SET*_DX10 instructions.
1574 case ISD::FP_TO_SINT: {
1575 SDValue FNeg = N->getOperand(0);
1576 if (FNeg.getOpcode() != ISD::FNEG) {
1577 return SDValue();
1578 }
1579 SDValue SelectCC = FNeg.getOperand(0);
1580 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1581 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1582 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1583 !isHWTrueValue(SelectCC.getOperand(2)) ||
1584 !isHWFalseValue(SelectCC.getOperand(3))) {
1585 return SDValue();
1586 }
1587
Andrew Trickef9de2a2013-05-25 02:42:55 +00001588 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001589 SelectCC.getOperand(0), // LHS
1590 SelectCC.getOperand(1), // RHS
1591 DAG.getConstant(-1, MVT::i32), // True
1592 DAG.getConstant(0, MVT::i32), // Flase
1593 SelectCC.getOperand(4)); // CC
1594
1595 break;
1596 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001597
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001598 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1599 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001600 case ISD::INSERT_VECTOR_ELT: {
1601 SDValue InVec = N->getOperand(0);
1602 SDValue InVal = N->getOperand(1);
1603 SDValue EltNo = N->getOperand(2);
1604 SDLoc dl(N);
1605
1606 // If the inserted element is an UNDEF, just use the input vector.
1607 if (InVal.getOpcode() == ISD::UNDEF)
1608 return InVec;
1609
1610 EVT VT = InVec.getValueType();
1611
1612 // If we can't generate a legal BUILD_VECTOR, exit
1613 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1614 return SDValue();
1615
1616 // Check that we know which element is being inserted
1617 if (!isa<ConstantSDNode>(EltNo))
1618 return SDValue();
1619 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1620
1621 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1622 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1623 // vector elements.
1624 SmallVector<SDValue, 8> Ops;
1625 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1626 Ops.append(InVec.getNode()->op_begin(),
1627 InVec.getNode()->op_end());
1628 } else if (InVec.getOpcode() == ISD::UNDEF) {
1629 unsigned NElts = VT.getVectorNumElements();
1630 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1631 } else {
1632 return SDValue();
1633 }
1634
1635 // Insert the element
1636 if (Elt < Ops.size()) {
1637 // All the operands of BUILD_VECTOR must have the same type;
1638 // we enforce that here.
1639 EVT OpVT = Ops[0].getValueType();
1640 if (InVal.getValueType() != OpVT)
1641 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1642 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1643 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1644 Ops[Elt] = InVal;
1645 }
1646
1647 // Return the new vector
Craig Topper48d114b2014-04-26 18:35:24 +00001648 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00001649 }
1650
Tom Stellard365366f2013-01-23 02:09:06 +00001651 // Extract_vec (Build_vector) generated by custom lowering
1652 // also needs to be customly combined
1653 case ISD::EXTRACT_VECTOR_ELT: {
1654 SDValue Arg = N->getOperand(0);
1655 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1656 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1657 unsigned Element = Const->getZExtValue();
1658 return Arg->getOperand(Element);
1659 }
1660 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001661 if (Arg.getOpcode() == ISD::BITCAST &&
1662 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1663 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1664 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001665 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001666 Arg->getOperand(0).getOperand(Element));
1667 }
1668 }
Tom Stellard365366f2013-01-23 02:09:06 +00001669 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001670
1671 case ISD::SELECT_CC: {
1672 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1673 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001674 //
1675 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1676 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001677 SDValue LHS = N->getOperand(0);
1678 if (LHS.getOpcode() != ISD::SELECT_CC) {
1679 return SDValue();
1680 }
1681
1682 SDValue RHS = N->getOperand(1);
1683 SDValue True = N->getOperand(2);
1684 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001685 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001686
1687 if (LHS.getOperand(2).getNode() != True.getNode() ||
1688 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001689 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001690 return SDValue();
1691 }
1692
Tom Stellard5e524892013-03-08 15:37:11 +00001693 switch (NCC) {
1694 default: return SDValue();
1695 case ISD::SETNE: return LHS;
1696 case ISD::SETEQ: {
1697 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1698 LHSCC = ISD::getSetCCInverse(LHSCC,
1699 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001700 if (DCI.isBeforeLegalizeOps() ||
1701 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1702 return DAG.getSelectCC(SDLoc(N),
1703 LHS.getOperand(0),
1704 LHS.getOperand(1),
1705 LHS.getOperand(2),
1706 LHS.getOperand(3),
1707 LHSCC);
1708 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001709 }
Tom Stellard5e524892013-03-08 15:37:11 +00001710 }
Tom Stellardcd428182013-09-28 02:50:38 +00001711 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001712 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001713
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001714 case AMDGPUISD::EXPORT: {
1715 SDValue Arg = N->getOperand(1);
1716 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1717 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001718
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001719 SDValue NewArgs[8] = {
1720 N->getOperand(0), // Chain
1721 SDValue(),
1722 N->getOperand(2), // ArrayBase
1723 N->getOperand(3), // Type
1724 N->getOperand(4), // SWZ_X
1725 N->getOperand(5), // SWZ_Y
1726 N->getOperand(6), // SWZ_Z
1727 N->getOperand(7) // SWZ_W
1728 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001729 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001730 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Craig Topper48d114b2014-04-26 18:35:24 +00001731 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00001732 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001733 case AMDGPUISD::TEXTURE_FETCH: {
1734 SDValue Arg = N->getOperand(1);
1735 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1736 break;
1737
1738 SDValue NewArgs[19] = {
1739 N->getOperand(0),
1740 N->getOperand(1),
1741 N->getOperand(2),
1742 N->getOperand(3),
1743 N->getOperand(4),
1744 N->getOperand(5),
1745 N->getOperand(6),
1746 N->getOperand(7),
1747 N->getOperand(8),
1748 N->getOperand(9),
1749 N->getOperand(10),
1750 N->getOperand(11),
1751 N->getOperand(12),
1752 N->getOperand(13),
1753 N->getOperand(14),
1754 N->getOperand(15),
1755 N->getOperand(16),
1756 N->getOperand(17),
1757 N->getOperand(18),
1758 };
1759 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1760 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
Craig Topper48d114b2014-04-26 18:35:24 +00001761 NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001762 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001763 }
1764 return SDValue();
1765}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001766
1767static bool
1768FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001769 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001770 const R600InstrInfo *TII =
1771 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1772 if (!Src.isMachineOpcode())
1773 return false;
1774 switch (Src.getMachineOpcode()) {
1775 case AMDGPU::FNEG_R600:
1776 if (!Neg.getNode())
1777 return false;
1778 Src = Src.getOperand(0);
1779 Neg = DAG.getTargetConstant(1, MVT::i32);
1780 return true;
1781 case AMDGPU::FABS_R600:
1782 if (!Abs.getNode())
1783 return false;
1784 Src = Src.getOperand(0);
1785 Abs = DAG.getTargetConstant(1, MVT::i32);
1786 return true;
1787 case AMDGPU::CONST_COPY: {
1788 unsigned Opcode = ParentNode->getMachineOpcode();
1789 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1790
1791 if (!Sel.getNode())
1792 return false;
1793
1794 SDValue CstOffset = Src.getOperand(0);
1795 if (ParentNode->getValueType(0).isVector())
1796 return false;
1797
1798 // Gather constants values
1799 int SrcIndices[] = {
1800 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1801 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1802 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1803 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1804 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1805 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1806 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1807 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1808 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1809 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1810 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1811 };
1812 std::vector<unsigned> Consts;
1813 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1814 int OtherSrcIdx = SrcIndices[i];
1815 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1816 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1817 continue;
1818 if (HasDst) {
1819 OtherSrcIdx--;
1820 OtherSelIdx--;
1821 }
1822 if (RegisterSDNode *Reg =
1823 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1824 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1825 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1826 ParentNode->getOperand(OtherSelIdx));
1827 Consts.push_back(Cst->getZExtValue());
1828 }
1829 }
1830 }
1831
1832 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1833 Consts.push_back(Cst->getZExtValue());
1834 if (!TII->fitsConstReadLimitations(Consts)) {
1835 return false;
1836 }
1837
1838 Sel = CstOffset;
1839 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1840 return true;
1841 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001842 case AMDGPU::MOV_IMM_I32:
1843 case AMDGPU::MOV_IMM_F32: {
1844 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1845 uint64_t ImmValue = 0;
1846
1847
1848 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1849 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1850 float FloatValue = FPC->getValueAPF().convertToFloat();
1851 if (FloatValue == 0.0) {
1852 ImmReg = AMDGPU::ZERO;
1853 } else if (FloatValue == 0.5) {
1854 ImmReg = AMDGPU::HALF;
1855 } else if (FloatValue == 1.0) {
1856 ImmReg = AMDGPU::ONE;
1857 } else {
1858 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1859 }
1860 } else {
1861 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1862 uint64_t Value = C->getZExtValue();
1863 if (Value == 0) {
1864 ImmReg = AMDGPU::ZERO;
1865 } else if (Value == 1) {
1866 ImmReg = AMDGPU::ONE_INT;
1867 } else {
1868 ImmValue = Value;
1869 }
1870 }
1871
1872 // Check that we aren't already using an immediate.
1873 // XXX: It's possible for an instruction to have more than one
1874 // immediate operand, but this is not supported yet.
1875 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1876 if (!Imm.getNode())
1877 return false;
1878 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1879 assert(C);
1880 if (C->getZExtValue())
1881 return false;
1882 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1883 }
1884 Src = DAG.getRegister(ImmReg, MVT::i32);
1885 return true;
1886 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001887 default:
1888 return false;
1889 }
1890}
1891
1892
1893/// \brief Fold the instructions after selecting them
1894SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1895 SelectionDAG &DAG) const {
1896 const R600InstrInfo *TII =
1897 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1898 if (!Node->isMachineOpcode())
1899 return Node;
1900 unsigned Opcode = Node->getMachineOpcode();
1901 SDValue FakeOp;
1902
1903 std::vector<SDValue> Ops;
1904 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1905 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001906 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001907
1908 if (Opcode == AMDGPU::DOT_4) {
1909 int OperandIdx[] = {
1910 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1911 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1912 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1913 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1914 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1915 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1916 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1917 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001918 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001919 int NegIdx[] = {
1920 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1921 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1922 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1923 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1924 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1925 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1926 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1927 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1928 };
1929 int AbsIdx[] = {
1930 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1931 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1932 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1933 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1934 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1935 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1936 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1937 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1938 };
1939 for (unsigned i = 0; i < 8; i++) {
1940 if (OperandIdx[i] < 0)
1941 return Node;
1942 SDValue &Src = Ops[OperandIdx[i] - 1];
1943 SDValue &Neg = Ops[NegIdx[i] - 1];
1944 SDValue &Abs = Ops[AbsIdx[i] - 1];
1945 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1946 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1947 if (HasDst)
1948 SelIdx--;
1949 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001950 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1951 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1952 }
1953 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1954 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1955 SDValue &Src = Ops[i];
1956 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001957 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1958 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001959 } else if (Opcode == AMDGPU::CLAMP_R600) {
1960 SDValue Src = Node->getOperand(0);
1961 if (!Src.isMachineOpcode() ||
1962 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1963 return Node;
1964 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1965 AMDGPU::OpName::clamp);
1966 if (ClampIdx < 0)
1967 return Node;
1968 std::vector<SDValue> Ops;
1969 unsigned NumOp = Src.getNumOperands();
1970 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001971 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001972 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1973 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1974 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001975 } else {
1976 if (!TII->hasInstrModifiers(Opcode))
1977 return Node;
1978 int OperandIdx[] = {
1979 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1980 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1981 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1982 };
1983 int NegIdx[] = {
1984 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1985 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1986 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1987 };
1988 int AbsIdx[] = {
1989 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1990 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1991 -1
1992 };
1993 for (unsigned i = 0; i < 3; i++) {
1994 if (OperandIdx[i] < 0)
1995 return Node;
1996 SDValue &Src = Ops[OperandIdx[i] - 1];
1997 SDValue &Neg = Ops[NegIdx[i] - 1];
1998 SDValue FakeAbs;
1999 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2000 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2001 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002002 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2003 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002004 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002005 ImmIdx--;
2006 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002007 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002008 SDValue &Imm = Ops[ImmIdx];
2009 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002010 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2011 }
2012 }
2013
2014 return Node;
2015}