blob: 3c2e3888e08569f39d6363db972c3553f0e7761a [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000093 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
95 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000097 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000098 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000099 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000101 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
102 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
Tom Stellard365366f2013-01-23 02:09:06 +0000104 setOperationAction(ISD::LOAD, MVT::i32, Custom);
105 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000106 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
107
Tom Stellard75aadc22012-12-11 21:25:42 +0000108 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000109 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000111 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000112 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000113
Michel Danzer49812b52013-07-10 16:37:07 +0000114 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
115
Tom Stellardb852af52013-03-08 15:37:03 +0000116 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000117 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000118 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119}
120
121MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
122 MachineInstr * MI, MachineBasicBlock * BB) const {
123 MachineFunction * MF = BB->getParent();
124 MachineRegisterInfo &MRI = MF->getRegInfo();
125 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000126 const R600InstrInfo *TII =
127 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
129 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000130 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000131 if (TII->isLDSInstr(MI->getOpcode()) &&
132 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
133 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
134 assert(DstIdx != -1);
135 MachineInstrBuilder NewMI;
136 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
137 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
138 AMDGPU::OQAP);
139 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
140 MI->getOperand(0).getReg(),
141 AMDGPU::OQAP);
142 } else {
143 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
144 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
145 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000146 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
147 NewMI.addOperand(MI->getOperand(i));
148 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000149 } else {
150 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
151 }
152 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 case AMDGPU::CLAMP_R600: {
154 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
155 AMDGPU::MOV,
156 MI->getOperand(0).getReg(),
157 MI->getOperand(1).getReg());
158 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
159 break;
160 }
161
162 case AMDGPU::FABS_R600: {
163 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
164 AMDGPU::MOV,
165 MI->getOperand(0).getReg(),
166 MI->getOperand(1).getReg());
167 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
168 break;
169 }
170
171 case AMDGPU::FNEG_R600: {
172 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
173 AMDGPU::MOV,
174 MI->getOperand(0).getReg(),
175 MI->getOperand(1).getReg());
176 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
177 break;
178 }
179
Tom Stellard75aadc22012-12-11 21:25:42 +0000180 case AMDGPU::MASK_WRITE: {
181 unsigned maskedRegister = MI->getOperand(0).getReg();
182 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
183 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
184 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
185 break;
186 }
187
188 case AMDGPU::MOV_IMM_F32:
189 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
190 MI->getOperand(1).getFPImm()->getValueAPF()
191 .bitcastToAPInt().getZExtValue());
192 break;
193 case AMDGPU::MOV_IMM_I32:
194 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
195 MI->getOperand(1).getImm());
196 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000197 case AMDGPU::CONST_COPY: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
199 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000200 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000201 MI->getOperand(1).getImm());
202 break;
203 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000206 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000207 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
208 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
209
210 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
211 .addOperand(MI->getOperand(0))
212 .addOperand(MI->getOperand(1))
213 .addImm(EOP); // Set End of program bit
214 break;
215 }
216
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::TXD: {
218 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
219 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000220 MachineOperand &RID = MI->getOperand(4);
221 MachineOperand &SID = MI->getOperand(5);
222 unsigned TextureId = MI->getOperand(6).getImm();
223 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
224 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 switch (TextureId) {
227 case 5: // Rect
228 CTX = CTY = 0;
229 break;
230 case 6: // Shadow1D
231 SrcW = SrcZ;
232 break;
233 case 7: // Shadow2D
234 SrcW = SrcZ;
235 break;
236 case 8: // ShadowRect
237 CTX = CTY = 0;
238 SrcW = SrcZ;
239 break;
240 case 9: // 1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 10: // 2DArray
245 CTZ = 0;
246 break;
247 case 11: // Shadow1DArray
248 SrcZ = SrcY;
249 CTZ = 0;
250 break;
251 case 12: // Shadow2DArray
252 CTZ = 0;
253 break;
254 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
256 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 .addImm(SrcX)
258 .addImm(SrcY)
259 .addImm(SrcZ)
260 .addImm(SrcW)
261 .addImm(0)
262 .addImm(0)
263 .addImm(0)
264 .addImm(0)
265 .addImm(1)
266 .addImm(2)
267 .addImm(3)
268 .addOperand(RID)
269 .addOperand(SID)
270 .addImm(CTX)
271 .addImm(CTY)
272 .addImm(CTZ)
273 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
275 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000276 .addImm(SrcX)
277 .addImm(SrcY)
278 .addImm(SrcZ)
279 .addImm(SrcW)
280 .addImm(0)
281 .addImm(0)
282 .addImm(0)
283 .addImm(0)
284 .addImm(1)
285 .addImm(2)
286 .addImm(3)
287 .addOperand(RID)
288 .addOperand(SID)
289 .addImm(CTX)
290 .addImm(CTY)
291 .addImm(CTZ)
292 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 .addImm(SrcX)
297 .addImm(SrcY)
298 .addImm(SrcZ)
299 .addImm(SrcW)
300 .addImm(0)
301 .addImm(0)
302 .addImm(0)
303 .addImm(0)
304 .addImm(1)
305 .addImm(2)
306 .addImm(3)
307 .addOperand(RID)
308 .addOperand(SID)
309 .addImm(CTX)
310 .addImm(CTY)
311 .addImm(CTZ)
312 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000313 .addReg(T0, RegState::Implicit)
314 .addReg(T1, RegState::Implicit);
315 break;
316 }
317
318 case AMDGPU::TXD_SHADOW: {
319 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
320 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 MachineOperand &RID = MI->getOperand(4);
322 MachineOperand &SID = MI->getOperand(5);
323 unsigned TextureId = MI->getOperand(6).getImm();
324 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
325 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
326
327 switch (TextureId) {
328 case 5: // Rect
329 CTX = CTY = 0;
330 break;
331 case 6: // Shadow1D
332 SrcW = SrcZ;
333 break;
334 case 7: // Shadow2D
335 SrcW = SrcZ;
336 break;
337 case 8: // ShadowRect
338 CTX = CTY = 0;
339 SrcW = SrcZ;
340 break;
341 case 9: // 1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 10: // 2DArray
346 CTZ = 0;
347 break;
348 case 11: // Shadow1DArray
349 SrcZ = SrcY;
350 CTZ = 0;
351 break;
352 case 12: // Shadow2DArray
353 CTZ = 0;
354 break;
355 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000356
357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
358 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
377 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000378 .addImm(SrcX)
379 .addImm(SrcY)
380 .addImm(SrcZ)
381 .addImm(SrcW)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(0)
386 .addImm(1)
387 .addImm(2)
388 .addImm(3)
389 .addOperand(RID)
390 .addOperand(SID)
391 .addImm(CTX)
392 .addImm(CTY)
393 .addImm(CTZ)
394 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
396 .addOperand(MI->getOperand(0))
397 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000398 .addImm(SrcX)
399 .addImm(SrcY)
400 .addImm(SrcZ)
401 .addImm(SrcW)
402 .addImm(0)
403 .addImm(0)
404 .addImm(0)
405 .addImm(0)
406 .addImm(1)
407 .addImm(2)
408 .addImm(3)
409 .addOperand(RID)
410 .addOperand(SID)
411 .addImm(CTX)
412 .addImm(CTY)
413 .addImm(CTZ)
414 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000415 .addReg(T0, RegState::Implicit)
416 .addReg(T1, RegState::Implicit);
417 break;
418 }
419
420 case AMDGPU::BRANCH:
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000422 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 break;
424
425 case AMDGPU::BRANCH_COND_f32: {
426 MachineInstr *NewMI =
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
428 AMDGPU::PREDICATE_BIT)
429 .addOperand(MI->getOperand(1))
430 .addImm(OPCODE_IS_NOT_ZERO)
431 .addImm(0); // Flags
432 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000433 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000434 .addOperand(MI->getOperand(0))
435 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
436 break;
437 }
438
439 case AMDGPU::BRANCH_COND_i32: {
440 MachineInstr *NewMI =
441 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
442 AMDGPU::PREDICATE_BIT)
443 .addOperand(MI->getOperand(1))
444 .addImm(OPCODE_IS_NOT_ZERO_INT)
445 .addImm(0); // Flags
446 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 .addOperand(MI->getOperand(0))
449 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
450 break;
451 }
452
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 case AMDGPU::EG_ExportSwz:
454 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 // Instruction is left unmodified if its not the last one of its type
456 bool isLastInstructionOfItsType = true;
457 unsigned InstExportType = MI->getOperand(1).getImm();
458 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
459 EndBlock = BB->end(); NextExportInst != EndBlock;
460 NextExportInst = llvm::next(NextExportInst)) {
461 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
462 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
463 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
464 .getImm();
465 if (CurrentInstExportType == InstExportType) {
466 isLastInstructionOfItsType = false;
467 break;
468 }
469 }
470 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000472 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 return BB;
474 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
476 .addOperand(MI->getOperand(0))
477 .addOperand(MI->getOperand(1))
478 .addOperand(MI->getOperand(2))
479 .addOperand(MI->getOperand(3))
480 .addOperand(MI->getOperand(4))
481 .addOperand(MI->getOperand(5))
482 .addOperand(MI->getOperand(6))
483 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 break;
486 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000487 case AMDGPU::RETURN: {
488 // RETURN instructions must have the live-out registers as implicit uses,
489 // otherwise they appear dead.
490 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
491 MachineInstrBuilder MIB(*MF, MI);
492 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
493 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
494 return BB;
495 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 }
497
498 MI->eraseFromParent();
499 return BB;
500}
501
502//===----------------------------------------------------------------------===//
503// Custom DAG Lowering Operations
504//===----------------------------------------------------------------------===//
505
Tom Stellard75aadc22012-12-11 21:25:42 +0000506SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000507 MachineFunction &MF = DAG.getMachineFunction();
508 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 switch (Op.getOpcode()) {
510 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000511 case ISD::FCOS:
512 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000515 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000516 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000517 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 case ISD::INTRINSIC_VOID: {
519 SDValue Chain = Op.getOperand(0);
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
522 switch (IntrinsicID) {
523 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
525 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000526 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000527 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000529 case AMDGPUIntrinsic::R600_store_swizzle: {
530 const SDValue Args[8] = {
531 Chain,
532 Op.getOperand(2), // Export Value
533 Op.getOperand(3), // ArrayBase
534 Op.getOperand(4), // Type
535 DAG.getConstant(0, MVT::i32), // SWZ_X
536 DAG.getConstant(1, MVT::i32), // SWZ_Y
537 DAG.getConstant(2, MVT::i32), // SWZ_Z
538 DAG.getConstant(3, MVT::i32) // SWZ_W
539 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000540 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000541 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 // default for switch(IntrinsicID)
545 default: break;
546 }
547 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
548 break;
549 }
550 case ISD::INTRINSIC_WO_CHAIN: {
551 unsigned IntrinsicID =
552 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
553 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000554 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 switch(IntrinsicID) {
556 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
557 case AMDGPUIntrinsic::R600_load_input: {
558 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
559 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000560 MachineFunction &MF = DAG.getMachineFunction();
561 MachineRegisterInfo &MRI = MF.getRegInfo();
562 MRI.addLiveIn(Reg);
563 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000564 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000565 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000566
567 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000569 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
570 MachineSDNode *interp;
571 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000572 const MachineFunction &MF = DAG.getMachineFunction();
573 const R600InstrInfo *TII =
574 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000575 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
576 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
577 return DAG.getTargetExtractSubreg(
578 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
579 DL, MVT::f32, SDValue(interp, 0));
580 }
581
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
Tom Stellard41afe6a2013-02-05 17:09:14 +0000593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000596 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000600 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000601 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000603 case AMDGPUIntrinsic::R600_tex:
604 case AMDGPUIntrinsic::R600_texc:
605 case AMDGPUIntrinsic::R600_txl:
606 case AMDGPUIntrinsic::R600_txlc:
607 case AMDGPUIntrinsic::R600_txb:
608 case AMDGPUIntrinsic::R600_txbc:
609 case AMDGPUIntrinsic::R600_txf:
610 case AMDGPUIntrinsic::R600_txq:
611 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000612 case AMDGPUIntrinsic::R600_ddy:
613 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000614 unsigned TextureOp;
615 switch (IntrinsicID) {
616 case AMDGPUIntrinsic::R600_tex:
617 TextureOp = 0;
618 break;
619 case AMDGPUIntrinsic::R600_texc:
620 TextureOp = 1;
621 break;
622 case AMDGPUIntrinsic::R600_txl:
623 TextureOp = 2;
624 break;
625 case AMDGPUIntrinsic::R600_txlc:
626 TextureOp = 3;
627 break;
628 case AMDGPUIntrinsic::R600_txb:
629 TextureOp = 4;
630 break;
631 case AMDGPUIntrinsic::R600_txbc:
632 TextureOp = 5;
633 break;
634 case AMDGPUIntrinsic::R600_txf:
635 TextureOp = 6;
636 break;
637 case AMDGPUIntrinsic::R600_txq:
638 TextureOp = 7;
639 break;
640 case AMDGPUIntrinsic::R600_ddx:
641 TextureOp = 8;
642 break;
643 case AMDGPUIntrinsic::R600_ddy:
644 TextureOp = 9;
645 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000646 case AMDGPUIntrinsic::R600_ldptr:
647 TextureOp = 10;
648 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000649 default:
650 llvm_unreachable("Unknow Texture Operation");
651 }
652
653 SDValue TexArgs[19] = {
654 DAG.getConstant(TextureOp, MVT::i32),
655 Op.getOperand(1),
656 DAG.getConstant(0, MVT::i32),
657 DAG.getConstant(1, MVT::i32),
658 DAG.getConstant(2, MVT::i32),
659 DAG.getConstant(3, MVT::i32),
660 Op.getOperand(2),
661 Op.getOperand(3),
662 Op.getOperand(4),
663 DAG.getConstant(0, MVT::i32),
664 DAG.getConstant(1, MVT::i32),
665 DAG.getConstant(2, MVT::i32),
666 DAG.getConstant(3, MVT::i32),
667 Op.getOperand(5),
668 Op.getOperand(6),
669 Op.getOperand(7),
670 Op.getOperand(8),
671 Op.getOperand(9),
672 Op.getOperand(10)
673 };
674 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
675 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000676 case AMDGPUIntrinsic::AMDGPU_dp4: {
677 SDValue Args[8] = {
678 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
679 DAG.getConstant(0, MVT::i32)),
680 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
681 DAG.getConstant(0, MVT::i32)),
682 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
683 DAG.getConstant(1, MVT::i32)),
684 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
685 DAG.getConstant(1, MVT::i32)),
686 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
687 DAG.getConstant(2, MVT::i32)),
688 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
689 DAG.getConstant(2, MVT::i32)),
690 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
691 DAG.getConstant(3, MVT::i32)),
692 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
693 DAG.getConstant(3, MVT::i32))
694 };
695 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
696 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000697
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000700 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000702 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000704 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000705 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000708 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000709 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000710 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000711 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000712 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000713 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000714 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000715 return LowerImplicitParameter(DAG, VT, DL, 8);
716
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000717 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000718 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
719 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
722 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000723 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000724 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
725 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
728 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000729 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000730 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
731 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
734 AMDGPU::T0_Z, VT);
735 }
736 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
737 break;
738 }
739 } // end switch(Op.getOpcode())
740 return SDValue();
741}
742
743void R600TargetLowering::ReplaceNodeResults(SDNode *N,
744 SmallVectorImpl<SDValue> &Results,
745 SelectionDAG &DAG) const {
746 switch (N->getOpcode()) {
747 default: return;
748 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000749 return;
750 case ISD::LOAD: {
751 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
752 Results.push_back(SDValue(Node, 0));
753 Results.push_back(SDValue(Node, 1));
754 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
755 // function
756 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
757 return;
758 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000759 case ISD::STORE:
760 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
761 Results.push_back(SDValue(Node, 0));
762 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000763 }
764}
765
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000766SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
767 // On hw >= R700, COS/SIN input must be between -1. and 1.
768 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
769 EVT VT = Op.getValueType();
770 SDValue Arg = Op.getOperand(0);
771 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
772 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
773 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
774 DAG.getConstantFP(0.15915494309, MVT::f32)),
775 DAG.getConstantFP(0.5, MVT::f32)));
776 unsigned TrigNode;
777 switch (Op.getOpcode()) {
778 case ISD::FCOS:
779 TrigNode = AMDGPUISD::COS_HW;
780 break;
781 case ISD::FSIN:
782 TrigNode = AMDGPUISD::SIN_HW;
783 break;
784 default:
785 llvm_unreachable("Wrong trig opcode");
786 }
787 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
788 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
789 DAG.getConstantFP(-0.5, MVT::f32)));
790 if (Gen >= AMDGPUSubtarget::R700)
791 return TrigVal;
792 // On R600 hw, COS/SIN input must be between -Pi and Pi.
793 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
794 DAG.getConstantFP(3.14159265359, MVT::f32));
795}
796
Tom Stellard75aadc22012-12-11 21:25:42 +0000797SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
798 return DAG.getNode(
799 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000800 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 MVT::i1,
802 Op, DAG.getConstantFP(0.0f, MVT::f32),
803 DAG.getCondCode(ISD::SETNE)
804 );
805}
806
Tom Stellard75aadc22012-12-11 21:25:42 +0000807SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000808 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 unsigned DwordOffset) const {
810 unsigned ByteOffset = DwordOffset * 4;
811 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000812 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000813
814 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
815 assert(isInt<16>(ByteOffset));
816
817 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
818 DAG.getConstant(ByteOffset, MVT::i32), // PTR
819 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
820 false, false, false, 0);
821}
822
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000823SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
824
825 MachineFunction &MF = DAG.getMachineFunction();
826 const AMDGPUFrameLowering *TFL =
827 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
828
829 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
830 assert(FIN);
831
832 unsigned FrameIndex = FIN->getIndex();
833 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
834 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
835}
836
Tom Stellard75aadc22012-12-11 21:25:42 +0000837bool R600TargetLowering::isZero(SDValue Op) const {
838 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
839 return Cst->isNullValue();
840 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
841 return CstFP->isZero();
842 } else {
843 return false;
844 }
845}
846
847SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000848 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000849 EVT VT = Op.getValueType();
850
851 SDValue LHS = Op.getOperand(0);
852 SDValue RHS = Op.getOperand(1);
853 SDValue True = Op.getOperand(2);
854 SDValue False = Op.getOperand(3);
855 SDValue CC = Op.getOperand(4);
856 SDValue Temp;
857
858 // LHS and RHS are guaranteed to be the same value type
859 EVT CompareVT = LHS.getValueType();
860
861 // Check if we can lower this to a native operation.
862
Tom Stellard2add82d2013-03-08 15:37:09 +0000863 // Try to lower to a SET* instruction:
864 //
865 // SET* can match the following patterns:
866 //
Tom Stellardcd428182013-09-28 02:50:38 +0000867 // select_cc f32, f32, -1, 0, cc_supported
868 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
869 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000870 //
871
872 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000873 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
874 ISD::CondCode InverseCC =
875 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000876 if (isHWTrueValue(False) && isHWFalseValue(True)) {
877 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
878 std::swap(False, True);
879 CC = DAG.getCondCode(InverseCC);
880 } else {
881 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
882 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
883 std::swap(False, True);
884 std::swap(LHS, RHS);
885 CC = DAG.getCondCode(SwapInvCC);
886 }
887 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000888 }
889
890 if (isHWTrueValue(True) && isHWFalseValue(False) &&
891 (CompareVT == VT || VT == MVT::i32)) {
892 // This can be matched by a SET* instruction.
893 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
894 }
895
Tom Stellard75aadc22012-12-11 21:25:42 +0000896 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000897 //
898 // CND* can match the following patterns:
899 //
Tom Stellardcd428182013-09-28 02:50:38 +0000900 // select_cc f32, 0.0, f32, f32, cc_supported
901 // select_cc f32, 0.0, i32, i32, cc_supported
902 // select_cc i32, 0, f32, f32, cc_supported
903 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000904 //
Tom Stellardcd428182013-09-28 02:50:38 +0000905
906 // Try to move the zero value to the RHS
907 if (isZero(LHS)) {
908 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
909 // Try swapping the operands
910 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
911 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
912 std::swap(LHS, RHS);
913 CC = DAG.getCondCode(CCSwapped);
914 } else {
915 // Try inverting the conditon and then swapping the operands
916 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
917 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
918 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
919 std::swap(True, False);
920 std::swap(LHS, RHS);
921 CC = DAG.getCondCode(CCSwapped);
922 }
923 }
924 }
925 if (isZero(RHS)) {
926 SDValue Cond = LHS;
927 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000928 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
929 if (CompareVT != VT) {
930 // Bitcast True / False to the correct types. This will end up being
931 // a nop, but it allows us to define only a single pattern in the
932 // .TD files for each CND* instruction rather than having to have
933 // one pattern for integer True/False and one for fp True/False
934 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
935 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
936 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000937
938 switch (CCOpcode) {
939 case ISD::SETONE:
940 case ISD::SETUNE:
941 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000942 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
943 Temp = True;
944 True = False;
945 False = Temp;
946 break;
947 default:
948 break;
949 }
950 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
951 Cond, Zero,
952 True, False,
953 DAG.getCondCode(CCOpcode));
954 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
955 }
956
Tom Stellard75aadc22012-12-11 21:25:42 +0000957
958 // Possible Min/Max pattern
959 SDValue MinMax = LowerMinMax(Op, DAG);
960 if (MinMax.getNode()) {
961 return MinMax;
962 }
963
964 // If we make it this for it means we have no native instructions to handle
965 // this SELECT_CC, so we must lower it.
966 SDValue HWTrue, HWFalse;
967
968 if (CompareVT == MVT::f32) {
969 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
970 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
971 } else if (CompareVT == MVT::i32) {
972 HWTrue = DAG.getConstant(-1, CompareVT);
973 HWFalse = DAG.getConstant(0, CompareVT);
974 }
975 else {
976 assert(!"Unhandled value type in LowerSELECT_CC");
977 }
978
979 // Lower this unsupported SELECT_CC into a combination of two supported
980 // SELECT_CC operations.
981 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
982
983 return DAG.getNode(ISD::SELECT_CC, DL, VT,
984 Cond, HWFalse,
985 True, False,
986 DAG.getCondCode(ISD::SETNE));
987}
988
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000989/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
990/// convert these pointers to a register index. Each register holds
991/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
992/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
993/// for indirect addressing.
994SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
995 unsigned StackWidth,
996 SelectionDAG &DAG) const {
997 unsigned SRLPad;
998 switch(StackWidth) {
999 case 1:
1000 SRLPad = 2;
1001 break;
1002 case 2:
1003 SRLPad = 3;
1004 break;
1005 case 4:
1006 SRLPad = 4;
1007 break;
1008 default: llvm_unreachable("Invalid stack width");
1009 }
1010
Andrew Trickef9de2a2013-05-25 02:42:55 +00001011 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001012 DAG.getConstant(SRLPad, MVT::i32));
1013}
1014
1015void R600TargetLowering::getStackAddress(unsigned StackWidth,
1016 unsigned ElemIdx,
1017 unsigned &Channel,
1018 unsigned &PtrIncr) const {
1019 switch (StackWidth) {
1020 default:
1021 case 1:
1022 Channel = 0;
1023 if (ElemIdx > 0) {
1024 PtrIncr = 1;
1025 } else {
1026 PtrIncr = 0;
1027 }
1028 break;
1029 case 2:
1030 Channel = ElemIdx % 2;
1031 if (ElemIdx == 2) {
1032 PtrIncr = 1;
1033 } else {
1034 PtrIncr = 0;
1035 }
1036 break;
1037 case 4:
1038 Channel = ElemIdx;
1039 PtrIncr = 0;
1040 break;
1041 }
1042}
1043
Tom Stellard75aadc22012-12-11 21:25:42 +00001044SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001045 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001046 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1047 SDValue Chain = Op.getOperand(0);
1048 SDValue Value = Op.getOperand(1);
1049 SDValue Ptr = Op.getOperand(2);
1050
Tom Stellard2ffc3302013-08-26 15:05:44 +00001051 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001052 if (Result.getNode()) {
1053 return Result;
1054 }
1055
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001056 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1057 if (StoreNode->isTruncatingStore()) {
1058 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001059 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 EVT MemVT = StoreNode->getMemoryVT();
1061 SDValue MaskConstant;
1062 if (MemVT == MVT::i8) {
1063 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1064 } else {
1065 assert(MemVT == MVT::i16);
1066 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1067 }
1068 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1069 DAG.getConstant(2, MVT::i32));
1070 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1071 DAG.getConstant(0x00000003, VT));
1072 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1073 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1074 DAG.getConstant(3, VT));
1075 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1076 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1077 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1078 // vector instead.
1079 SDValue Src[4] = {
1080 ShiftedValue,
1081 DAG.getConstant(0, MVT::i32),
1082 DAG.getConstant(0, MVT::i32),
1083 Mask
1084 };
1085 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1086 SDValue Args[3] = { Chain, Input, DWordAddr };
1087 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1088 Op->getVTList(), Args, 3, MemVT,
1089 StoreNode->getMemOperand());
1090 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1091 Value.getValueType().bitsGE(MVT::i32)) {
1092 // Convert pointer from byte address to dword address.
1093 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1094 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1095 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001096
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001097 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1098 assert(!"Truncated and indexed stores not supported yet");
1099 } else {
1100 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1101 }
1102 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001103 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001104 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001105
1106 EVT ValueVT = Value.getValueType();
1107
1108 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1109 return SDValue();
1110 }
1111
1112 // Lowering for indirect addressing
1113
1114 const MachineFunction &MF = DAG.getMachineFunction();
1115 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1116 getTargetMachine().getFrameLowering());
1117 unsigned StackWidth = TFL->getStackWidth(MF);
1118
1119 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1120
1121 if (ValueVT.isVector()) {
1122 unsigned NumElemVT = ValueVT.getVectorNumElements();
1123 EVT ElemVT = ValueVT.getVectorElementType();
1124 SDValue Stores[4];
1125
1126 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1127 "vector width in load");
1128
1129 for (unsigned i = 0; i < NumElemVT; ++i) {
1130 unsigned Channel, PtrIncr;
1131 getStackAddress(StackWidth, i, Channel, PtrIncr);
1132 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1133 DAG.getConstant(PtrIncr, MVT::i32));
1134 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1135 Value, DAG.getConstant(i, MVT::i32));
1136
1137 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1138 Chain, Elem, Ptr,
1139 DAG.getTargetConstant(Channel, MVT::i32));
1140 }
1141 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1142 } else {
1143 if (ValueVT == MVT::i8) {
1144 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1145 }
1146 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001147 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001148 }
1149
1150 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001151}
1152
Tom Stellard365366f2013-01-23 02:09:06 +00001153// return (512 + (kc_bank << 12)
1154static int
1155ConstantAddressBlock(unsigned AddressSpace) {
1156 switch (AddressSpace) {
1157 case AMDGPUAS::CONSTANT_BUFFER_0:
1158 return 512;
1159 case AMDGPUAS::CONSTANT_BUFFER_1:
1160 return 512 + 4096;
1161 case AMDGPUAS::CONSTANT_BUFFER_2:
1162 return 512 + 4096 * 2;
1163 case AMDGPUAS::CONSTANT_BUFFER_3:
1164 return 512 + 4096 * 3;
1165 case AMDGPUAS::CONSTANT_BUFFER_4:
1166 return 512 + 4096 * 4;
1167 case AMDGPUAS::CONSTANT_BUFFER_5:
1168 return 512 + 4096 * 5;
1169 case AMDGPUAS::CONSTANT_BUFFER_6:
1170 return 512 + 4096 * 6;
1171 case AMDGPUAS::CONSTANT_BUFFER_7:
1172 return 512 + 4096 * 7;
1173 case AMDGPUAS::CONSTANT_BUFFER_8:
1174 return 512 + 4096 * 8;
1175 case AMDGPUAS::CONSTANT_BUFFER_9:
1176 return 512 + 4096 * 9;
1177 case AMDGPUAS::CONSTANT_BUFFER_10:
1178 return 512 + 4096 * 10;
1179 case AMDGPUAS::CONSTANT_BUFFER_11:
1180 return 512 + 4096 * 11;
1181 case AMDGPUAS::CONSTANT_BUFFER_12:
1182 return 512 + 4096 * 12;
1183 case AMDGPUAS::CONSTANT_BUFFER_13:
1184 return 512 + 4096 * 13;
1185 case AMDGPUAS::CONSTANT_BUFFER_14:
1186 return 512 + 4096 * 14;
1187 case AMDGPUAS::CONSTANT_BUFFER_15:
1188 return 512 + 4096 * 15;
1189 default:
1190 return -1;
1191 }
1192}
1193
1194SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1195{
1196 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001197 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001198 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1199 SDValue Chain = Op.getOperand(0);
1200 SDValue Ptr = Op.getOperand(1);
1201 SDValue LoweredLoad;
1202
Tom Stellard35bb18c2013-08-26 15:06:04 +00001203 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1204 SDValue MergedValues[2] = {
1205 SplitVectorLoad(Op, DAG),
1206 Chain
1207 };
1208 return DAG.getMergeValues(MergedValues, 2, DL);
1209 }
1210
Tom Stellard365366f2013-01-23 02:09:06 +00001211 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1212 if (ConstantBlock > -1) {
1213 SDValue Result;
1214 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001215 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1216 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001217 SDValue Slots[4];
1218 for (unsigned i = 0; i < 4; i++) {
1219 // We want Const position encoded with the following formula :
1220 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1221 // const_index is Ptr computed by llvm using an alignment of 16.
1222 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1223 // then div by 4 at the ISel step
1224 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1225 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1226 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1227 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001228 EVT NewVT = MVT::v4i32;
1229 unsigned NumElements = 4;
1230 if (VT.isVector()) {
1231 NewVT = VT;
1232 NumElements = VT.getVectorNumElements();
1233 }
1234 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001235 } else {
1236 // non constant ptr cant be folded, keeps it as a v4f32 load
1237 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001238 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001239 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001240 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001241 );
1242 }
1243
1244 if (!VT.isVector()) {
1245 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1246 DAG.getConstant(0, MVT::i32));
1247 }
1248
1249 SDValue MergedValues[2] = {
1250 Result,
1251 Chain
1252 };
1253 return DAG.getMergeValues(MergedValues, 2, DL);
1254 }
1255
Tom Stellard84021442013-07-23 01:48:24 +00001256 // For most operations returning SDValue() will result int he node being
1257 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1258 // we need to manually expand loads that may be legal in some address spaces
1259 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1260 // for compute shaders, since the data is sign extended when it is uploaded
1261 // to the buffer. Howerver SEXT loads from other addresspaces are not
1262 // supported, so we need to expand them here.
1263 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1264 EVT MemVT = LoadNode->getMemoryVT();
1265 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1266 SDValue ShiftAmount =
1267 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1268 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1269 LoadNode->getPointerInfo(), MemVT,
1270 LoadNode->isVolatile(),
1271 LoadNode->isNonTemporal(),
1272 LoadNode->getAlignment());
1273 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1274 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1275
1276 SDValue MergedValues[2] = { Sra, Chain };
1277 return DAG.getMergeValues(MergedValues, 2, DL);
1278 }
1279
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001280 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1281 return SDValue();
1282 }
1283
1284 // Lowering for indirect addressing
1285 const MachineFunction &MF = DAG.getMachineFunction();
1286 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1287 getTargetMachine().getFrameLowering());
1288 unsigned StackWidth = TFL->getStackWidth(MF);
1289
1290 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1291
1292 if (VT.isVector()) {
1293 unsigned NumElemVT = VT.getVectorNumElements();
1294 EVT ElemVT = VT.getVectorElementType();
1295 SDValue Loads[4];
1296
1297 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1298 "vector width in load");
1299
1300 for (unsigned i = 0; i < NumElemVT; ++i) {
1301 unsigned Channel, PtrIncr;
1302 getStackAddress(StackWidth, i, Channel, PtrIncr);
1303 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1304 DAG.getConstant(PtrIncr, MVT::i32));
1305 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1306 Chain, Ptr,
1307 DAG.getTargetConstant(Channel, MVT::i32),
1308 Op.getOperand(2));
1309 }
1310 for (unsigned i = NumElemVT; i < 4; ++i) {
1311 Loads[i] = DAG.getUNDEF(ElemVT);
1312 }
1313 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1314 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1315 } else {
1316 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1317 Chain, Ptr,
1318 DAG.getTargetConstant(0, MVT::i32), // Channel
1319 Op.getOperand(2));
1320 }
1321
1322 SDValue Ops[2];
1323 Ops[0] = LoweredLoad;
1324 Ops[1] = Chain;
1325
1326 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001327}
Tom Stellard75aadc22012-12-11 21:25:42 +00001328
Tom Stellard75aadc22012-12-11 21:25:42 +00001329/// XXX Only kernel functions are supported, so we can assume for now that
1330/// every function is a kernel function, but in the future we should use
1331/// separate calling conventions for kernel and non-kernel functions.
1332SDValue R600TargetLowering::LowerFormalArguments(
1333 SDValue Chain,
1334 CallingConv::ID CallConv,
1335 bool isVarArg,
1336 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001337 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001338 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001339 SmallVector<CCValAssign, 16> ArgLocs;
1340 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1341 getTargetMachine(), ArgLocs, *DAG.getContext());
1342
1343 AnalyzeFormalArguments(CCInfo, Ins);
1344
Tom Stellard1e803092013-07-23 01:48:18 +00001345 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001346 CCValAssign &VA = ArgLocs[i];
1347 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001348
Tom Stellard75aadc22012-12-11 21:25:42 +00001349 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001350 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001351
1352 // The first 36 bytes of the input buffer contains information about
1353 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001354 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001355 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001356 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1357 false, false, 4); // 4 is the prefered alignment for
1358 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001359 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001360 }
1361 return Chain;
1362}
1363
Matt Arsenault758659232013-05-18 00:21:46 +00001364EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001365 if (!VT.isVector()) return MVT::i32;
1366 return VT.changeVectorElementTypeToInteger();
1367}
1368
Benjamin Kramer193960c2013-06-11 13:32:25 +00001369static SDValue
1370CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1371 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001372 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1373 assert(RemapSwizzle.empty());
1374 SDValue NewBldVec[4] = {
1375 VectorEntry.getOperand(0),
1376 VectorEntry.getOperand(1),
1377 VectorEntry.getOperand(2),
1378 VectorEntry.getOperand(3)
1379 };
1380
1381 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001382 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1383 // We mask write here to teach later passes that the ith element of this
1384 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1385 // break false dependencies and additionnaly make assembly easier to read.
1386 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001387 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1388 if (C->isZero()) {
1389 RemapSwizzle[i] = 4; // SEL_0
1390 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1391 } else if (C->isExactlyValue(1.0)) {
1392 RemapSwizzle[i] = 5; // SEL_1
1393 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1394 }
1395 }
1396
1397 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1398 continue;
1399 for (unsigned j = 0; j < i; j++) {
1400 if (NewBldVec[i] == NewBldVec[j]) {
1401 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1402 RemapSwizzle[i] = j;
1403 break;
1404 }
1405 }
1406 }
1407
1408 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1409 VectorEntry.getValueType(), NewBldVec, 4);
1410}
1411
Benjamin Kramer193960c2013-06-11 13:32:25 +00001412static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1413 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001414 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1415 assert(RemapSwizzle.empty());
1416 SDValue NewBldVec[4] = {
1417 VectorEntry.getOperand(0),
1418 VectorEntry.getOperand(1),
1419 VectorEntry.getOperand(2),
1420 VectorEntry.getOperand(3)
1421 };
1422 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001423 for (unsigned i = 0; i < 4; i++)
1424 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001425
1426 for (unsigned i = 0; i < 4; i++) {
1427 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1428 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1429 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001430 if (i == Idx) {
1431 isUnmovable[Idx] = true;
1432 continue;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001433 }
Vincent Lejeune301beb82013-10-13 17:56:04 +00001434 if (isUnmovable[Idx])
1435 continue;
1436 // Swap i and Idx
1437 std::swap(NewBldVec[Idx], NewBldVec[i]);
1438 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1439 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001440 }
1441 }
1442
1443 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1444 VectorEntry.getValueType(), NewBldVec, 4);
1445}
1446
1447
1448SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1449SDValue Swz[4], SelectionDAG &DAG) const {
1450 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1451 // Old -> New swizzle values
1452 DenseMap<unsigned, unsigned> SwizzleRemap;
1453
1454 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1455 for (unsigned i = 0; i < 4; i++) {
1456 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1457 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1458 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1459 }
1460
1461 SwizzleRemap.clear();
1462 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1463 for (unsigned i = 0; i < 4; i++) {
1464 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1465 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1466 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1467 }
1468
1469 return BuildVector;
1470}
1471
1472
Tom Stellard75aadc22012-12-11 21:25:42 +00001473//===----------------------------------------------------------------------===//
1474// Custom DAG Optimizations
1475//===----------------------------------------------------------------------===//
1476
1477SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1478 DAGCombinerInfo &DCI) const {
1479 SelectionDAG &DAG = DCI.DAG;
1480
1481 switch (N->getOpcode()) {
1482 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1483 case ISD::FP_ROUND: {
1484 SDValue Arg = N->getOperand(0);
1485 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001486 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001487 Arg.getOperand(0));
1488 }
1489 break;
1490 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001491
1492 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1493 // (i32 select_cc f32, f32, -1, 0 cc)
1494 //
1495 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1496 // this to one of the SET*_DX10 instructions.
1497 case ISD::FP_TO_SINT: {
1498 SDValue FNeg = N->getOperand(0);
1499 if (FNeg.getOpcode() != ISD::FNEG) {
1500 return SDValue();
1501 }
1502 SDValue SelectCC = FNeg.getOperand(0);
1503 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1504 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1505 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1506 !isHWTrueValue(SelectCC.getOperand(2)) ||
1507 !isHWFalseValue(SelectCC.getOperand(3))) {
1508 return SDValue();
1509 }
1510
Andrew Trickef9de2a2013-05-25 02:42:55 +00001511 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001512 SelectCC.getOperand(0), // LHS
1513 SelectCC.getOperand(1), // RHS
1514 DAG.getConstant(-1, MVT::i32), // True
1515 DAG.getConstant(0, MVT::i32), // Flase
1516 SelectCC.getOperand(4)); // CC
1517
1518 break;
1519 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001520
1521 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1522 // => build_vector elt0, …, NewEltIdx, …, eltN
1523 case ISD::INSERT_VECTOR_ELT: {
1524 SDValue InVec = N->getOperand(0);
1525 SDValue InVal = N->getOperand(1);
1526 SDValue EltNo = N->getOperand(2);
1527 SDLoc dl(N);
1528
1529 // If the inserted element is an UNDEF, just use the input vector.
1530 if (InVal.getOpcode() == ISD::UNDEF)
1531 return InVec;
1532
1533 EVT VT = InVec.getValueType();
1534
1535 // If we can't generate a legal BUILD_VECTOR, exit
1536 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1537 return SDValue();
1538
1539 // Check that we know which element is being inserted
1540 if (!isa<ConstantSDNode>(EltNo))
1541 return SDValue();
1542 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1543
1544 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1545 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1546 // vector elements.
1547 SmallVector<SDValue, 8> Ops;
1548 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1549 Ops.append(InVec.getNode()->op_begin(),
1550 InVec.getNode()->op_end());
1551 } else if (InVec.getOpcode() == ISD::UNDEF) {
1552 unsigned NElts = VT.getVectorNumElements();
1553 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1554 } else {
1555 return SDValue();
1556 }
1557
1558 // Insert the element
1559 if (Elt < Ops.size()) {
1560 // All the operands of BUILD_VECTOR must have the same type;
1561 // we enforce that here.
1562 EVT OpVT = Ops[0].getValueType();
1563 if (InVal.getValueType() != OpVT)
1564 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1565 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1566 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1567 Ops[Elt] = InVal;
1568 }
1569
1570 // Return the new vector
1571 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1572 VT, &Ops[0], Ops.size());
1573 }
1574
Tom Stellard365366f2013-01-23 02:09:06 +00001575 // Extract_vec (Build_vector) generated by custom lowering
1576 // also needs to be customly combined
1577 case ISD::EXTRACT_VECTOR_ELT: {
1578 SDValue Arg = N->getOperand(0);
1579 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1580 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1581 unsigned Element = Const->getZExtValue();
1582 return Arg->getOperand(Element);
1583 }
1584 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001585 if (Arg.getOpcode() == ISD::BITCAST &&
1586 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1587 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1588 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001589 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001590 Arg->getOperand(0).getOperand(Element));
1591 }
1592 }
Tom Stellard365366f2013-01-23 02:09:06 +00001593 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001594
1595 case ISD::SELECT_CC: {
1596 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1597 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001598 //
1599 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1600 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001601 SDValue LHS = N->getOperand(0);
1602 if (LHS.getOpcode() != ISD::SELECT_CC) {
1603 return SDValue();
1604 }
1605
1606 SDValue RHS = N->getOperand(1);
1607 SDValue True = N->getOperand(2);
1608 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001609 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001610
1611 if (LHS.getOperand(2).getNode() != True.getNode() ||
1612 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001613 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001614 return SDValue();
1615 }
1616
Tom Stellard5e524892013-03-08 15:37:11 +00001617 switch (NCC) {
1618 default: return SDValue();
1619 case ISD::SETNE: return LHS;
1620 case ISD::SETEQ: {
1621 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1622 LHSCC = ISD::getSetCCInverse(LHSCC,
1623 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001624 if (DCI.isBeforeLegalizeOps() ||
1625 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1626 return DAG.getSelectCC(SDLoc(N),
1627 LHS.getOperand(0),
1628 LHS.getOperand(1),
1629 LHS.getOperand(2),
1630 LHS.getOperand(3),
1631 LHSCC);
1632 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001633 }
Tom Stellard5e524892013-03-08 15:37:11 +00001634 }
Tom Stellardcd428182013-09-28 02:50:38 +00001635 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001636 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001637
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001638 case AMDGPUISD::EXPORT: {
1639 SDValue Arg = N->getOperand(1);
1640 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1641 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001642
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001643 SDValue NewArgs[8] = {
1644 N->getOperand(0), // Chain
1645 SDValue(),
1646 N->getOperand(2), // ArrayBase
1647 N->getOperand(3), // Type
1648 N->getOperand(4), // SWZ_X
1649 N->getOperand(5), // SWZ_Y
1650 N->getOperand(6), // SWZ_Z
1651 N->getOperand(7) // SWZ_W
1652 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001653 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001654 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001655 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001656 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001657 case AMDGPUISD::TEXTURE_FETCH: {
1658 SDValue Arg = N->getOperand(1);
1659 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1660 break;
1661
1662 SDValue NewArgs[19] = {
1663 N->getOperand(0),
1664 N->getOperand(1),
1665 N->getOperand(2),
1666 N->getOperand(3),
1667 N->getOperand(4),
1668 N->getOperand(5),
1669 N->getOperand(6),
1670 N->getOperand(7),
1671 N->getOperand(8),
1672 N->getOperand(9),
1673 N->getOperand(10),
1674 N->getOperand(11),
1675 N->getOperand(12),
1676 N->getOperand(13),
1677 N->getOperand(14),
1678 N->getOperand(15),
1679 N->getOperand(16),
1680 N->getOperand(17),
1681 N->getOperand(18),
1682 };
1683 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1684 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1685 NewArgs, 19);
1686 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001687 }
1688 return SDValue();
1689}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001690
1691static bool
1692FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001693 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001694 const R600InstrInfo *TII =
1695 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1696 if (!Src.isMachineOpcode())
1697 return false;
1698 switch (Src.getMachineOpcode()) {
1699 case AMDGPU::FNEG_R600:
1700 if (!Neg.getNode())
1701 return false;
1702 Src = Src.getOperand(0);
1703 Neg = DAG.getTargetConstant(1, MVT::i32);
1704 return true;
1705 case AMDGPU::FABS_R600:
1706 if (!Abs.getNode())
1707 return false;
1708 Src = Src.getOperand(0);
1709 Abs = DAG.getTargetConstant(1, MVT::i32);
1710 return true;
1711 case AMDGPU::CONST_COPY: {
1712 unsigned Opcode = ParentNode->getMachineOpcode();
1713 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1714
1715 if (!Sel.getNode())
1716 return false;
1717
1718 SDValue CstOffset = Src.getOperand(0);
1719 if (ParentNode->getValueType(0).isVector())
1720 return false;
1721
1722 // Gather constants values
1723 int SrcIndices[] = {
1724 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1725 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1726 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1727 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1728 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1729 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1730 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1731 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1732 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1733 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1734 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1735 };
1736 std::vector<unsigned> Consts;
1737 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1738 int OtherSrcIdx = SrcIndices[i];
1739 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1740 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1741 continue;
1742 if (HasDst) {
1743 OtherSrcIdx--;
1744 OtherSelIdx--;
1745 }
1746 if (RegisterSDNode *Reg =
1747 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1748 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1749 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1750 ParentNode->getOperand(OtherSelIdx));
1751 Consts.push_back(Cst->getZExtValue());
1752 }
1753 }
1754 }
1755
1756 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1757 Consts.push_back(Cst->getZExtValue());
1758 if (!TII->fitsConstReadLimitations(Consts)) {
1759 return false;
1760 }
1761
1762 Sel = CstOffset;
1763 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1764 return true;
1765 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001766 case AMDGPU::MOV_IMM_I32:
1767 case AMDGPU::MOV_IMM_F32: {
1768 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1769 uint64_t ImmValue = 0;
1770
1771
1772 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1773 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1774 float FloatValue = FPC->getValueAPF().convertToFloat();
1775 if (FloatValue == 0.0) {
1776 ImmReg = AMDGPU::ZERO;
1777 } else if (FloatValue == 0.5) {
1778 ImmReg = AMDGPU::HALF;
1779 } else if (FloatValue == 1.0) {
1780 ImmReg = AMDGPU::ONE;
1781 } else {
1782 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1783 }
1784 } else {
1785 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1786 uint64_t Value = C->getZExtValue();
1787 if (Value == 0) {
1788 ImmReg = AMDGPU::ZERO;
1789 } else if (Value == 1) {
1790 ImmReg = AMDGPU::ONE_INT;
1791 } else {
1792 ImmValue = Value;
1793 }
1794 }
1795
1796 // Check that we aren't already using an immediate.
1797 // XXX: It's possible for an instruction to have more than one
1798 // immediate operand, but this is not supported yet.
1799 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1800 if (!Imm.getNode())
1801 return false;
1802 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1803 assert(C);
1804 if (C->getZExtValue())
1805 return false;
1806 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1807 }
1808 Src = DAG.getRegister(ImmReg, MVT::i32);
1809 return true;
1810 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001811 default:
1812 return false;
1813 }
1814}
1815
1816
1817/// \brief Fold the instructions after selecting them
1818SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1819 SelectionDAG &DAG) const {
1820 const R600InstrInfo *TII =
1821 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1822 if (!Node->isMachineOpcode())
1823 return Node;
1824 unsigned Opcode = Node->getMachineOpcode();
1825 SDValue FakeOp;
1826
1827 std::vector<SDValue> Ops;
1828 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1829 I != E; ++I)
1830 Ops.push_back(*I);
1831
1832 if (Opcode == AMDGPU::DOT_4) {
1833 int OperandIdx[] = {
1834 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1835 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1836 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1837 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1838 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1839 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1840 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1841 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1842 };
1843 int NegIdx[] = {
1844 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1845 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1846 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1847 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1848 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1849 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1850 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1851 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1852 };
1853 int AbsIdx[] = {
1854 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1855 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1856 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1857 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1858 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1859 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1860 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1861 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1862 };
1863 for (unsigned i = 0; i < 8; i++) {
1864 if (OperandIdx[i] < 0)
1865 return Node;
1866 SDValue &Src = Ops[OperandIdx[i] - 1];
1867 SDValue &Neg = Ops[NegIdx[i] - 1];
1868 SDValue &Abs = Ops[AbsIdx[i] - 1];
1869 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1870 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1871 if (HasDst)
1872 SelIdx--;
1873 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001874 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1875 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1876 }
1877 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1878 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1879 SDValue &Src = Ops[i];
1880 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001881 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1882 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001883 } else if (Opcode == AMDGPU::CLAMP_R600) {
1884 SDValue Src = Node->getOperand(0);
1885 if (!Src.isMachineOpcode() ||
1886 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1887 return Node;
1888 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1889 AMDGPU::OpName::clamp);
1890 if (ClampIdx < 0)
1891 return Node;
1892 std::vector<SDValue> Ops;
1893 unsigned NumOp = Src.getNumOperands();
1894 for(unsigned i = 0; i < NumOp; ++i)
1895 Ops.push_back(Src.getOperand(i));
1896 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1897 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1898 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001899 } else {
1900 if (!TII->hasInstrModifiers(Opcode))
1901 return Node;
1902 int OperandIdx[] = {
1903 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1904 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1905 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1906 };
1907 int NegIdx[] = {
1908 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1909 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1910 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1911 };
1912 int AbsIdx[] = {
1913 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1914 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1915 -1
1916 };
1917 for (unsigned i = 0; i < 3; i++) {
1918 if (OperandIdx[i] < 0)
1919 return Node;
1920 SDValue &Src = Ops[OperandIdx[i] - 1];
1921 SDValue &Neg = Ops[NegIdx[i] - 1];
1922 SDValue FakeAbs;
1923 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1924 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1925 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001926 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1927 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001928 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001929 ImmIdx--;
1930 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001931 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001932 SDValue &Imm = Ops[ImmIdx];
1933 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001934 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1935 }
1936 }
1937
1938 return Node;
1939}