blob: 62577eabf998370e6c58c7f1518880f1abaf3bf0 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000093 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
95 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000097 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000098 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000099 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000101 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
102 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
Tom Stellard365366f2013-01-23 02:09:06 +0000104 setOperationAction(ISD::LOAD, MVT::i32, Custom);
105 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000106 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
107
Tom Stellard75aadc22012-12-11 21:25:42 +0000108 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000109 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000111 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000112 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000113
Michel Danzer49812b52013-07-10 16:37:07 +0000114 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
115
Tom Stellardb852af52013-03-08 15:37:03 +0000116 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000117 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000118 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119}
120
121MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
122 MachineInstr * MI, MachineBasicBlock * BB) const {
123 MachineFunction * MF = BB->getParent();
124 MachineRegisterInfo &MRI = MF->getRegInfo();
125 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000126 const R600InstrInfo *TII =
127 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
129 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000130 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000131 if (TII->isLDSInstr(MI->getOpcode()) &&
132 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
133 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
134 assert(DstIdx != -1);
135 MachineInstrBuilder NewMI;
136 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
137 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
138 AMDGPU::OQAP);
139 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
140 MI->getOperand(0).getReg(),
141 AMDGPU::OQAP);
142 } else {
143 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
144 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
145 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000146 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
147 NewMI.addOperand(MI->getOperand(i));
148 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000149 } else {
150 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
151 }
152 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 case AMDGPU::CLAMP_R600: {
154 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
155 AMDGPU::MOV,
156 MI->getOperand(0).getReg(),
157 MI->getOperand(1).getReg());
158 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
159 break;
160 }
161
162 case AMDGPU::FABS_R600: {
163 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
164 AMDGPU::MOV,
165 MI->getOperand(0).getReg(),
166 MI->getOperand(1).getReg());
167 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
168 break;
169 }
170
171 case AMDGPU::FNEG_R600: {
172 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
173 AMDGPU::MOV,
174 MI->getOperand(0).getReg(),
175 MI->getOperand(1).getReg());
176 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
177 break;
178 }
179
Tom Stellard75aadc22012-12-11 21:25:42 +0000180 case AMDGPU::MASK_WRITE: {
181 unsigned maskedRegister = MI->getOperand(0).getReg();
182 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
183 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
184 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
185 break;
186 }
187
188 case AMDGPU::MOV_IMM_F32:
189 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
190 MI->getOperand(1).getFPImm()->getValueAPF()
191 .bitcastToAPInt().getZExtValue());
192 break;
193 case AMDGPU::MOV_IMM_I32:
194 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
195 MI->getOperand(1).getImm());
196 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000197 case AMDGPU::CONST_COPY: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
199 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000200 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000201 MI->getOperand(1).getImm());
202 break;
203 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000206 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000207 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
208 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
209
210 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
211 .addOperand(MI->getOperand(0))
212 .addOperand(MI->getOperand(1))
213 .addImm(EOP); // Set End of program bit
214 break;
215 }
216
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::TXD: {
218 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
219 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000220 MachineOperand &RID = MI->getOperand(4);
221 MachineOperand &SID = MI->getOperand(5);
222 unsigned TextureId = MI->getOperand(6).getImm();
223 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
224 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 switch (TextureId) {
227 case 5: // Rect
228 CTX = CTY = 0;
229 break;
230 case 6: // Shadow1D
231 SrcW = SrcZ;
232 break;
233 case 7: // Shadow2D
234 SrcW = SrcZ;
235 break;
236 case 8: // ShadowRect
237 CTX = CTY = 0;
238 SrcW = SrcZ;
239 break;
240 case 9: // 1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 10: // 2DArray
245 CTZ = 0;
246 break;
247 case 11: // Shadow1DArray
248 SrcZ = SrcY;
249 CTZ = 0;
250 break;
251 case 12: // Shadow2DArray
252 CTZ = 0;
253 break;
254 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
256 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 .addImm(SrcX)
258 .addImm(SrcY)
259 .addImm(SrcZ)
260 .addImm(SrcW)
261 .addImm(0)
262 .addImm(0)
263 .addImm(0)
264 .addImm(0)
265 .addImm(1)
266 .addImm(2)
267 .addImm(3)
268 .addOperand(RID)
269 .addOperand(SID)
270 .addImm(CTX)
271 .addImm(CTY)
272 .addImm(CTZ)
273 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
275 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000276 .addImm(SrcX)
277 .addImm(SrcY)
278 .addImm(SrcZ)
279 .addImm(SrcW)
280 .addImm(0)
281 .addImm(0)
282 .addImm(0)
283 .addImm(0)
284 .addImm(1)
285 .addImm(2)
286 .addImm(3)
287 .addOperand(RID)
288 .addOperand(SID)
289 .addImm(CTX)
290 .addImm(CTY)
291 .addImm(CTZ)
292 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 .addImm(SrcX)
297 .addImm(SrcY)
298 .addImm(SrcZ)
299 .addImm(SrcW)
300 .addImm(0)
301 .addImm(0)
302 .addImm(0)
303 .addImm(0)
304 .addImm(1)
305 .addImm(2)
306 .addImm(3)
307 .addOperand(RID)
308 .addOperand(SID)
309 .addImm(CTX)
310 .addImm(CTY)
311 .addImm(CTZ)
312 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000313 .addReg(T0, RegState::Implicit)
314 .addReg(T1, RegState::Implicit);
315 break;
316 }
317
318 case AMDGPU::TXD_SHADOW: {
319 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
320 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 MachineOperand &RID = MI->getOperand(4);
322 MachineOperand &SID = MI->getOperand(5);
323 unsigned TextureId = MI->getOperand(6).getImm();
324 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
325 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
326
327 switch (TextureId) {
328 case 5: // Rect
329 CTX = CTY = 0;
330 break;
331 case 6: // Shadow1D
332 SrcW = SrcZ;
333 break;
334 case 7: // Shadow2D
335 SrcW = SrcZ;
336 break;
337 case 8: // ShadowRect
338 CTX = CTY = 0;
339 SrcW = SrcZ;
340 break;
341 case 9: // 1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 10: // 2DArray
346 CTZ = 0;
347 break;
348 case 11: // Shadow1DArray
349 SrcZ = SrcY;
350 CTZ = 0;
351 break;
352 case 12: // Shadow2DArray
353 CTZ = 0;
354 break;
355 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000356
357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
358 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
377 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000378 .addImm(SrcX)
379 .addImm(SrcY)
380 .addImm(SrcZ)
381 .addImm(SrcW)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(0)
386 .addImm(1)
387 .addImm(2)
388 .addImm(3)
389 .addOperand(RID)
390 .addOperand(SID)
391 .addImm(CTX)
392 .addImm(CTY)
393 .addImm(CTZ)
394 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
396 .addOperand(MI->getOperand(0))
397 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000398 .addImm(SrcX)
399 .addImm(SrcY)
400 .addImm(SrcZ)
401 .addImm(SrcW)
402 .addImm(0)
403 .addImm(0)
404 .addImm(0)
405 .addImm(0)
406 .addImm(1)
407 .addImm(2)
408 .addImm(3)
409 .addOperand(RID)
410 .addOperand(SID)
411 .addImm(CTX)
412 .addImm(CTY)
413 .addImm(CTZ)
414 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000415 .addReg(T0, RegState::Implicit)
416 .addReg(T1, RegState::Implicit);
417 break;
418 }
419
420 case AMDGPU::BRANCH:
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000422 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 break;
424
425 case AMDGPU::BRANCH_COND_f32: {
426 MachineInstr *NewMI =
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
428 AMDGPU::PREDICATE_BIT)
429 .addOperand(MI->getOperand(1))
430 .addImm(OPCODE_IS_NOT_ZERO)
431 .addImm(0); // Flags
432 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000433 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000434 .addOperand(MI->getOperand(0))
435 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
436 break;
437 }
438
439 case AMDGPU::BRANCH_COND_i32: {
440 MachineInstr *NewMI =
441 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
442 AMDGPU::PREDICATE_BIT)
443 .addOperand(MI->getOperand(1))
444 .addImm(OPCODE_IS_NOT_ZERO_INT)
445 .addImm(0); // Flags
446 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 .addOperand(MI->getOperand(0))
449 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
450 break;
451 }
452
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 case AMDGPU::EG_ExportSwz:
454 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 // Instruction is left unmodified if its not the last one of its type
456 bool isLastInstructionOfItsType = true;
457 unsigned InstExportType = MI->getOperand(1).getImm();
458 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
459 EndBlock = BB->end(); NextExportInst != EndBlock;
460 NextExportInst = llvm::next(NextExportInst)) {
461 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
462 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
463 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
464 .getImm();
465 if (CurrentInstExportType == InstExportType) {
466 isLastInstructionOfItsType = false;
467 break;
468 }
469 }
470 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000472 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 return BB;
474 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
476 .addOperand(MI->getOperand(0))
477 .addOperand(MI->getOperand(1))
478 .addOperand(MI->getOperand(2))
479 .addOperand(MI->getOperand(3))
480 .addOperand(MI->getOperand(4))
481 .addOperand(MI->getOperand(5))
482 .addOperand(MI->getOperand(6))
483 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 break;
486 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000487 case AMDGPU::RETURN: {
488 // RETURN instructions must have the live-out registers as implicit uses,
489 // otherwise they appear dead.
490 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
491 MachineInstrBuilder MIB(*MF, MI);
492 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
493 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
494 return BB;
495 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 }
497
498 MI->eraseFromParent();
499 return BB;
500}
501
502//===----------------------------------------------------------------------===//
503// Custom DAG Lowering Operations
504//===----------------------------------------------------------------------===//
505
Tom Stellard75aadc22012-12-11 21:25:42 +0000506SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000507 MachineFunction &MF = DAG.getMachineFunction();
508 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 switch (Op.getOpcode()) {
510 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000511 case ISD::FCOS:
512 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000515 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000516 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000517 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 case ISD::INTRINSIC_VOID: {
519 SDValue Chain = Op.getOperand(0);
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
522 switch (IntrinsicID) {
523 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
525 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000526 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000527 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000529 case AMDGPUIntrinsic::R600_store_swizzle: {
530 const SDValue Args[8] = {
531 Chain,
532 Op.getOperand(2), // Export Value
533 Op.getOperand(3), // ArrayBase
534 Op.getOperand(4), // Type
535 DAG.getConstant(0, MVT::i32), // SWZ_X
536 DAG.getConstant(1, MVT::i32), // SWZ_Y
537 DAG.getConstant(2, MVT::i32), // SWZ_Z
538 DAG.getConstant(3, MVT::i32) // SWZ_W
539 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000540 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000541 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 // default for switch(IntrinsicID)
545 default: break;
546 }
547 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
548 break;
549 }
550 case ISD::INTRINSIC_WO_CHAIN: {
551 unsigned IntrinsicID =
552 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
553 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000554 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 switch(IntrinsicID) {
556 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
557 case AMDGPUIntrinsic::R600_load_input: {
558 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
559 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000560 MachineFunction &MF = DAG.getMachineFunction();
561 MachineRegisterInfo &MRI = MF.getRegInfo();
562 MRI.addLiveIn(Reg);
563 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000564 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000565 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000566
567 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000569 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
570 MachineSDNode *interp;
571 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000572 const MachineFunction &MF = DAG.getMachineFunction();
573 const R600InstrInfo *TII =
574 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000575 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
576 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
577 return DAG.getTargetExtractSubreg(
578 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
579 DL, MVT::f32, SDValue(interp, 0));
580 }
581
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
Tom Stellard41afe6a2013-02-05 17:09:14 +0000593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000596 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000600 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000601 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000603 case AMDGPUIntrinsic::R600_tex:
604 case AMDGPUIntrinsic::R600_texc:
605 case AMDGPUIntrinsic::R600_txl:
606 case AMDGPUIntrinsic::R600_txlc:
607 case AMDGPUIntrinsic::R600_txb:
608 case AMDGPUIntrinsic::R600_txbc:
609 case AMDGPUIntrinsic::R600_txf:
610 case AMDGPUIntrinsic::R600_txq:
611 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000612 case AMDGPUIntrinsic::R600_ddy:
613 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000614 unsigned TextureOp;
615 switch (IntrinsicID) {
616 case AMDGPUIntrinsic::R600_tex:
617 TextureOp = 0;
618 break;
619 case AMDGPUIntrinsic::R600_texc:
620 TextureOp = 1;
621 break;
622 case AMDGPUIntrinsic::R600_txl:
623 TextureOp = 2;
624 break;
625 case AMDGPUIntrinsic::R600_txlc:
626 TextureOp = 3;
627 break;
628 case AMDGPUIntrinsic::R600_txb:
629 TextureOp = 4;
630 break;
631 case AMDGPUIntrinsic::R600_txbc:
632 TextureOp = 5;
633 break;
634 case AMDGPUIntrinsic::R600_txf:
635 TextureOp = 6;
636 break;
637 case AMDGPUIntrinsic::R600_txq:
638 TextureOp = 7;
639 break;
640 case AMDGPUIntrinsic::R600_ddx:
641 TextureOp = 8;
642 break;
643 case AMDGPUIntrinsic::R600_ddy:
644 TextureOp = 9;
645 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000646 case AMDGPUIntrinsic::R600_ldptr:
647 TextureOp = 10;
648 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000649 default:
650 llvm_unreachable("Unknow Texture Operation");
651 }
652
653 SDValue TexArgs[19] = {
654 DAG.getConstant(TextureOp, MVT::i32),
655 Op.getOperand(1),
656 DAG.getConstant(0, MVT::i32),
657 DAG.getConstant(1, MVT::i32),
658 DAG.getConstant(2, MVT::i32),
659 DAG.getConstant(3, MVT::i32),
660 Op.getOperand(2),
661 Op.getOperand(3),
662 Op.getOperand(4),
663 DAG.getConstant(0, MVT::i32),
664 DAG.getConstant(1, MVT::i32),
665 DAG.getConstant(2, MVT::i32),
666 DAG.getConstant(3, MVT::i32),
667 Op.getOperand(5),
668 Op.getOperand(6),
669 Op.getOperand(7),
670 Op.getOperand(8),
671 Op.getOperand(9),
672 Op.getOperand(10)
673 };
674 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
675 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000676 case AMDGPUIntrinsic::AMDGPU_dp4: {
677 SDValue Args[8] = {
678 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
679 DAG.getConstant(0, MVT::i32)),
680 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
681 DAG.getConstant(0, MVT::i32)),
682 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
683 DAG.getConstant(1, MVT::i32)),
684 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
685 DAG.getConstant(1, MVT::i32)),
686 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
687 DAG.getConstant(2, MVT::i32)),
688 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
689 DAG.getConstant(2, MVT::i32)),
690 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
691 DAG.getConstant(3, MVT::i32)),
692 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
693 DAG.getConstant(3, MVT::i32))
694 };
695 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
696 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000697
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000700 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000702 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000704 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000705 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000708 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000709 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000710 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000711 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000712 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000713 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000714 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000715 return LowerImplicitParameter(DAG, VT, DL, 8);
716
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000717 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000718 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
719 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
722 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000723 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000724 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
725 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
728 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000729 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000730 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
731 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
734 AMDGPU::T0_Z, VT);
735 }
736 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
737 break;
738 }
739 } // end switch(Op.getOpcode())
740 return SDValue();
741}
742
743void R600TargetLowering::ReplaceNodeResults(SDNode *N,
744 SmallVectorImpl<SDValue> &Results,
745 SelectionDAG &DAG) const {
746 switch (N->getOpcode()) {
747 default: return;
748 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000749 return;
750 case ISD::LOAD: {
751 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
752 Results.push_back(SDValue(Node, 0));
753 Results.push_back(SDValue(Node, 1));
754 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
755 // function
756 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
757 return;
758 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000759 case ISD::STORE:
760 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
761 Results.push_back(SDValue(Node, 0));
762 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000763 }
764}
765
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000766SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
767 // On hw >= R700, COS/SIN input must be between -1. and 1.
768 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
769 EVT VT = Op.getValueType();
770 SDValue Arg = Op.getOperand(0);
771 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
772 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
773 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
774 DAG.getConstantFP(0.15915494309, MVT::f32)),
775 DAG.getConstantFP(0.5, MVT::f32)));
776 unsigned TrigNode;
777 switch (Op.getOpcode()) {
778 case ISD::FCOS:
779 TrigNode = AMDGPUISD::COS_HW;
780 break;
781 case ISD::FSIN:
782 TrigNode = AMDGPUISD::SIN_HW;
783 break;
784 default:
785 llvm_unreachable("Wrong trig opcode");
786 }
787 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
788 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
789 DAG.getConstantFP(-0.5, MVT::f32)));
790 if (Gen >= AMDGPUSubtarget::R700)
791 return TrigVal;
792 // On R600 hw, COS/SIN input must be between -Pi and Pi.
793 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
794 DAG.getConstantFP(3.14159265359, MVT::f32));
795}
796
Tom Stellard75aadc22012-12-11 21:25:42 +0000797SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
798 return DAG.getNode(
799 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000800 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 MVT::i1,
802 Op, DAG.getConstantFP(0.0f, MVT::f32),
803 DAG.getCondCode(ISD::SETNE)
804 );
805}
806
Tom Stellard75aadc22012-12-11 21:25:42 +0000807SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000808 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 unsigned DwordOffset) const {
810 unsigned ByteOffset = DwordOffset * 4;
811 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000812 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000813
814 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
815 assert(isInt<16>(ByteOffset));
816
817 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
818 DAG.getConstant(ByteOffset, MVT::i32), // PTR
819 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
820 false, false, false, 0);
821}
822
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000823SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
824
825 MachineFunction &MF = DAG.getMachineFunction();
826 const AMDGPUFrameLowering *TFL =
827 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
828
829 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
830 assert(FIN);
831
832 unsigned FrameIndex = FIN->getIndex();
833 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
834 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
835}
836
Tom Stellard75aadc22012-12-11 21:25:42 +0000837bool R600TargetLowering::isZero(SDValue Op) const {
838 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
839 return Cst->isNullValue();
840 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
841 return CstFP->isZero();
842 } else {
843 return false;
844 }
845}
846
847SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000848 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000849 EVT VT = Op.getValueType();
850
851 SDValue LHS = Op.getOperand(0);
852 SDValue RHS = Op.getOperand(1);
853 SDValue True = Op.getOperand(2);
854 SDValue False = Op.getOperand(3);
855 SDValue CC = Op.getOperand(4);
856 SDValue Temp;
857
858 // LHS and RHS are guaranteed to be the same value type
859 EVT CompareVT = LHS.getValueType();
860
861 // Check if we can lower this to a native operation.
862
Tom Stellard2add82d2013-03-08 15:37:09 +0000863 // Try to lower to a SET* instruction:
864 //
865 // SET* can match the following patterns:
866 //
Tom Stellardcd428182013-09-28 02:50:38 +0000867 // select_cc f32, f32, -1, 0, cc_supported
868 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
869 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000870 //
871
872 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000873 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
874 ISD::CondCode InverseCC =
875 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000876 if (isHWTrueValue(False) && isHWFalseValue(True)) {
877 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
878 std::swap(False, True);
879 CC = DAG.getCondCode(InverseCC);
880 } else {
881 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
882 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
883 std::swap(False, True);
884 std::swap(LHS, RHS);
885 CC = DAG.getCondCode(SwapInvCC);
886 }
887 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000888 }
889
890 if (isHWTrueValue(True) && isHWFalseValue(False) &&
891 (CompareVT == VT || VT == MVT::i32)) {
892 // This can be matched by a SET* instruction.
893 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
894 }
895
Tom Stellard75aadc22012-12-11 21:25:42 +0000896 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000897 //
898 // CND* can match the following patterns:
899 //
Tom Stellardcd428182013-09-28 02:50:38 +0000900 // select_cc f32, 0.0, f32, f32, cc_supported
901 // select_cc f32, 0.0, i32, i32, cc_supported
902 // select_cc i32, 0, f32, f32, cc_supported
903 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000904 //
Tom Stellardcd428182013-09-28 02:50:38 +0000905
906 // Try to move the zero value to the RHS
907 if (isZero(LHS)) {
908 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
909 // Try swapping the operands
910 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
911 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
912 std::swap(LHS, RHS);
913 CC = DAG.getCondCode(CCSwapped);
914 } else {
915 // Try inverting the conditon and then swapping the operands
916 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
917 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
918 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
919 std::swap(True, False);
920 std::swap(LHS, RHS);
921 CC = DAG.getCondCode(CCSwapped);
922 }
923 }
924 }
925 if (isZero(RHS)) {
926 SDValue Cond = LHS;
927 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000928 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
929 if (CompareVT != VT) {
930 // Bitcast True / False to the correct types. This will end up being
931 // a nop, but it allows us to define only a single pattern in the
932 // .TD files for each CND* instruction rather than having to have
933 // one pattern for integer True/False and one for fp True/False
934 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
935 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
936 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000937
938 switch (CCOpcode) {
939 case ISD::SETONE:
940 case ISD::SETUNE:
941 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000942 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
943 Temp = True;
944 True = False;
945 False = Temp;
946 break;
947 default:
948 break;
949 }
950 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
951 Cond, Zero,
952 True, False,
953 DAG.getCondCode(CCOpcode));
954 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
955 }
956
Tom Stellard75aadc22012-12-11 21:25:42 +0000957
958 // Possible Min/Max pattern
959 SDValue MinMax = LowerMinMax(Op, DAG);
960 if (MinMax.getNode()) {
961 return MinMax;
962 }
963
964 // If we make it this for it means we have no native instructions to handle
965 // this SELECT_CC, so we must lower it.
966 SDValue HWTrue, HWFalse;
967
968 if (CompareVT == MVT::f32) {
969 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
970 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
971 } else if (CompareVT == MVT::i32) {
972 HWTrue = DAG.getConstant(-1, CompareVT);
973 HWFalse = DAG.getConstant(0, CompareVT);
974 }
975 else {
976 assert(!"Unhandled value type in LowerSELECT_CC");
977 }
978
979 // Lower this unsupported SELECT_CC into a combination of two supported
980 // SELECT_CC operations.
981 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
982
983 return DAG.getNode(ISD::SELECT_CC, DL, VT,
984 Cond, HWFalse,
985 True, False,
986 DAG.getCondCode(ISD::SETNE));
987}
988
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000989/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
990/// convert these pointers to a register index. Each register holds
991/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
992/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
993/// for indirect addressing.
994SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
995 unsigned StackWidth,
996 SelectionDAG &DAG) const {
997 unsigned SRLPad;
998 switch(StackWidth) {
999 case 1:
1000 SRLPad = 2;
1001 break;
1002 case 2:
1003 SRLPad = 3;
1004 break;
1005 case 4:
1006 SRLPad = 4;
1007 break;
1008 default: llvm_unreachable("Invalid stack width");
1009 }
1010
Andrew Trickef9de2a2013-05-25 02:42:55 +00001011 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001012 DAG.getConstant(SRLPad, MVT::i32));
1013}
1014
1015void R600TargetLowering::getStackAddress(unsigned StackWidth,
1016 unsigned ElemIdx,
1017 unsigned &Channel,
1018 unsigned &PtrIncr) const {
1019 switch (StackWidth) {
1020 default:
1021 case 1:
1022 Channel = 0;
1023 if (ElemIdx > 0) {
1024 PtrIncr = 1;
1025 } else {
1026 PtrIncr = 0;
1027 }
1028 break;
1029 case 2:
1030 Channel = ElemIdx % 2;
1031 if (ElemIdx == 2) {
1032 PtrIncr = 1;
1033 } else {
1034 PtrIncr = 0;
1035 }
1036 break;
1037 case 4:
1038 Channel = ElemIdx;
1039 PtrIncr = 0;
1040 break;
1041 }
1042}
1043
Tom Stellard75aadc22012-12-11 21:25:42 +00001044SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001045 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001046 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1047 SDValue Chain = Op.getOperand(0);
1048 SDValue Value = Op.getOperand(1);
1049 SDValue Ptr = Op.getOperand(2);
1050
Tom Stellard2ffc3302013-08-26 15:05:44 +00001051 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001052 if (Result.getNode()) {
1053 return Result;
1054 }
1055
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001056 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1057 if (StoreNode->isTruncatingStore()) {
1058 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001059 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 EVT MemVT = StoreNode->getMemoryVT();
1061 SDValue MaskConstant;
1062 if (MemVT == MVT::i8) {
1063 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1064 } else {
1065 assert(MemVT == MVT::i16);
1066 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1067 }
1068 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1069 DAG.getConstant(2, MVT::i32));
1070 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1071 DAG.getConstant(0x00000003, VT));
1072 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1073 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1074 DAG.getConstant(3, VT));
1075 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1076 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1077 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1078 // vector instead.
1079 SDValue Src[4] = {
1080 ShiftedValue,
1081 DAG.getConstant(0, MVT::i32),
1082 DAG.getConstant(0, MVT::i32),
1083 Mask
1084 };
1085 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1086 SDValue Args[3] = { Chain, Input, DWordAddr };
1087 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1088 Op->getVTList(), Args, 3, MemVT,
1089 StoreNode->getMemOperand());
1090 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1091 Value.getValueType().bitsGE(MVT::i32)) {
1092 // Convert pointer from byte address to dword address.
1093 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1094 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1095 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001096
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001097 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1098 assert(!"Truncated and indexed stores not supported yet");
1099 } else {
1100 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1101 }
1102 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001103 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001104 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001105
1106 EVT ValueVT = Value.getValueType();
1107
1108 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1109 return SDValue();
1110 }
1111
1112 // Lowering for indirect addressing
1113
1114 const MachineFunction &MF = DAG.getMachineFunction();
1115 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1116 getTargetMachine().getFrameLowering());
1117 unsigned StackWidth = TFL->getStackWidth(MF);
1118
1119 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1120
1121 if (ValueVT.isVector()) {
1122 unsigned NumElemVT = ValueVT.getVectorNumElements();
1123 EVT ElemVT = ValueVT.getVectorElementType();
1124 SDValue Stores[4];
1125
1126 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1127 "vector width in load");
1128
1129 for (unsigned i = 0; i < NumElemVT; ++i) {
1130 unsigned Channel, PtrIncr;
1131 getStackAddress(StackWidth, i, Channel, PtrIncr);
1132 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1133 DAG.getConstant(PtrIncr, MVT::i32));
1134 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1135 Value, DAG.getConstant(i, MVT::i32));
1136
1137 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1138 Chain, Elem, Ptr,
1139 DAG.getTargetConstant(Channel, MVT::i32));
1140 }
1141 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1142 } else {
1143 if (ValueVT == MVT::i8) {
1144 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1145 }
1146 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001147 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001148 }
1149
1150 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001151}
1152
Tom Stellard365366f2013-01-23 02:09:06 +00001153// return (512 + (kc_bank << 12)
1154static int
1155ConstantAddressBlock(unsigned AddressSpace) {
1156 switch (AddressSpace) {
1157 case AMDGPUAS::CONSTANT_BUFFER_0:
1158 return 512;
1159 case AMDGPUAS::CONSTANT_BUFFER_1:
1160 return 512 + 4096;
1161 case AMDGPUAS::CONSTANT_BUFFER_2:
1162 return 512 + 4096 * 2;
1163 case AMDGPUAS::CONSTANT_BUFFER_3:
1164 return 512 + 4096 * 3;
1165 case AMDGPUAS::CONSTANT_BUFFER_4:
1166 return 512 + 4096 * 4;
1167 case AMDGPUAS::CONSTANT_BUFFER_5:
1168 return 512 + 4096 * 5;
1169 case AMDGPUAS::CONSTANT_BUFFER_6:
1170 return 512 + 4096 * 6;
1171 case AMDGPUAS::CONSTANT_BUFFER_7:
1172 return 512 + 4096 * 7;
1173 case AMDGPUAS::CONSTANT_BUFFER_8:
1174 return 512 + 4096 * 8;
1175 case AMDGPUAS::CONSTANT_BUFFER_9:
1176 return 512 + 4096 * 9;
1177 case AMDGPUAS::CONSTANT_BUFFER_10:
1178 return 512 + 4096 * 10;
1179 case AMDGPUAS::CONSTANT_BUFFER_11:
1180 return 512 + 4096 * 11;
1181 case AMDGPUAS::CONSTANT_BUFFER_12:
1182 return 512 + 4096 * 12;
1183 case AMDGPUAS::CONSTANT_BUFFER_13:
1184 return 512 + 4096 * 13;
1185 case AMDGPUAS::CONSTANT_BUFFER_14:
1186 return 512 + 4096 * 14;
1187 case AMDGPUAS::CONSTANT_BUFFER_15:
1188 return 512 + 4096 * 15;
1189 default:
1190 return -1;
1191 }
1192}
1193
1194SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1195{
1196 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001197 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001198 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1199 SDValue Chain = Op.getOperand(0);
1200 SDValue Ptr = Op.getOperand(1);
1201 SDValue LoweredLoad;
1202
Tom Stellard35bb18c2013-08-26 15:06:04 +00001203 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1204 SDValue MergedValues[2] = {
1205 SplitVectorLoad(Op, DAG),
1206 Chain
1207 };
1208 return DAG.getMergeValues(MergedValues, 2, DL);
1209 }
1210
Tom Stellard365366f2013-01-23 02:09:06 +00001211 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1212 if (ConstantBlock > -1) {
1213 SDValue Result;
1214 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001215 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1216 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001217 SDValue Slots[4];
1218 for (unsigned i = 0; i < 4; i++) {
1219 // We want Const position encoded with the following formula :
1220 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1221 // const_index is Ptr computed by llvm using an alignment of 16.
1222 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1223 // then div by 4 at the ISel step
1224 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1225 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1226 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1227 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001228 EVT NewVT = MVT::v4i32;
1229 unsigned NumElements = 4;
1230 if (VT.isVector()) {
1231 NewVT = VT;
1232 NumElements = VT.getVectorNumElements();
1233 }
1234 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001235 } else {
1236 // non constant ptr cant be folded, keeps it as a v4f32 load
1237 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001238 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001239 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001240 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001241 );
1242 }
1243
1244 if (!VT.isVector()) {
1245 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1246 DAG.getConstant(0, MVT::i32));
1247 }
1248
1249 SDValue MergedValues[2] = {
1250 Result,
1251 Chain
1252 };
1253 return DAG.getMergeValues(MergedValues, 2, DL);
1254 }
1255
Tom Stellard84021442013-07-23 01:48:24 +00001256 // For most operations returning SDValue() will result int he node being
1257 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1258 // we need to manually expand loads that may be legal in some address spaces
1259 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1260 // for compute shaders, since the data is sign extended when it is uploaded
1261 // to the buffer. Howerver SEXT loads from other addresspaces are not
1262 // supported, so we need to expand them here.
1263 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1264 EVT MemVT = LoadNode->getMemoryVT();
1265 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1266 SDValue ShiftAmount =
1267 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1268 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1269 LoadNode->getPointerInfo(), MemVT,
1270 LoadNode->isVolatile(),
1271 LoadNode->isNonTemporal(),
1272 LoadNode->getAlignment());
1273 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1274 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1275
1276 SDValue MergedValues[2] = { Sra, Chain };
1277 return DAG.getMergeValues(MergedValues, 2, DL);
1278 }
1279
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001280 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1281 return SDValue();
1282 }
1283
1284 // Lowering for indirect addressing
1285 const MachineFunction &MF = DAG.getMachineFunction();
1286 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1287 getTargetMachine().getFrameLowering());
1288 unsigned StackWidth = TFL->getStackWidth(MF);
1289
1290 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1291
1292 if (VT.isVector()) {
1293 unsigned NumElemVT = VT.getVectorNumElements();
1294 EVT ElemVT = VT.getVectorElementType();
1295 SDValue Loads[4];
1296
1297 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1298 "vector width in load");
1299
1300 for (unsigned i = 0; i < NumElemVT; ++i) {
1301 unsigned Channel, PtrIncr;
1302 getStackAddress(StackWidth, i, Channel, PtrIncr);
1303 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1304 DAG.getConstant(PtrIncr, MVT::i32));
1305 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1306 Chain, Ptr,
1307 DAG.getTargetConstant(Channel, MVT::i32),
1308 Op.getOperand(2));
1309 }
1310 for (unsigned i = NumElemVT; i < 4; ++i) {
1311 Loads[i] = DAG.getUNDEF(ElemVT);
1312 }
1313 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1314 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1315 } else {
1316 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1317 Chain, Ptr,
1318 DAG.getTargetConstant(0, MVT::i32), // Channel
1319 Op.getOperand(2));
1320 }
1321
1322 SDValue Ops[2];
1323 Ops[0] = LoweredLoad;
1324 Ops[1] = Chain;
1325
1326 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001327}
Tom Stellard75aadc22012-12-11 21:25:42 +00001328
Tom Stellard75aadc22012-12-11 21:25:42 +00001329/// XXX Only kernel functions are supported, so we can assume for now that
1330/// every function is a kernel function, but in the future we should use
1331/// separate calling conventions for kernel and non-kernel functions.
1332SDValue R600TargetLowering::LowerFormalArguments(
1333 SDValue Chain,
1334 CallingConv::ID CallConv,
1335 bool isVarArg,
1336 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001337 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001338 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001339 SmallVector<CCValAssign, 16> ArgLocs;
1340 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1341 getTargetMachine(), ArgLocs, *DAG.getContext());
1342
1343 AnalyzeFormalArguments(CCInfo, Ins);
1344
Tom Stellard1e803092013-07-23 01:48:18 +00001345 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001346 CCValAssign &VA = ArgLocs[i];
1347 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001348
Tom Stellard75aadc22012-12-11 21:25:42 +00001349 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001350 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001351
1352 // The first 36 bytes of the input buffer contains information about
1353 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001354 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001355 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001356 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1357 false, false, 4); // 4 is the prefered alignment for
1358 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001359 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001360 }
1361 return Chain;
1362}
1363
Matt Arsenault758659232013-05-18 00:21:46 +00001364EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001365 if (!VT.isVector()) return MVT::i32;
1366 return VT.changeVectorElementTypeToInteger();
1367}
1368
Benjamin Kramer193960c2013-06-11 13:32:25 +00001369static SDValue
1370CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1371 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001372 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1373 assert(RemapSwizzle.empty());
1374 SDValue NewBldVec[4] = {
1375 VectorEntry.getOperand(0),
1376 VectorEntry.getOperand(1),
1377 VectorEntry.getOperand(2),
1378 VectorEntry.getOperand(3)
1379 };
1380
1381 for (unsigned i = 0; i < 4; i++) {
1382 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1383 if (C->isZero()) {
1384 RemapSwizzle[i] = 4; // SEL_0
1385 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1386 } else if (C->isExactlyValue(1.0)) {
1387 RemapSwizzle[i] = 5; // SEL_1
1388 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1389 }
1390 }
1391
1392 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1393 continue;
1394 for (unsigned j = 0; j < i; j++) {
1395 if (NewBldVec[i] == NewBldVec[j]) {
1396 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1397 RemapSwizzle[i] = j;
1398 break;
1399 }
1400 }
1401 }
1402
1403 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1404 VectorEntry.getValueType(), NewBldVec, 4);
1405}
1406
Benjamin Kramer193960c2013-06-11 13:32:25 +00001407static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1408 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001409 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1410 assert(RemapSwizzle.empty());
1411 SDValue NewBldVec[4] = {
1412 VectorEntry.getOperand(0),
1413 VectorEntry.getOperand(1),
1414 VectorEntry.getOperand(2),
1415 VectorEntry.getOperand(3)
1416 };
1417 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001418 for (unsigned i = 0; i < 4; i++)
1419 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001420
1421 for (unsigned i = 0; i < 4; i++) {
1422 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1423 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1424 ->getZExtValue();
1425 if (!isUnmovable[Idx]) {
1426 // Swap i and Idx
1427 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001428 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001429 }
1430 isUnmovable[Idx] = true;
1431 }
1432 }
1433
1434 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1435 VectorEntry.getValueType(), NewBldVec, 4);
1436}
1437
1438
1439SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1440SDValue Swz[4], SelectionDAG &DAG) const {
1441 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1442 // Old -> New swizzle values
1443 DenseMap<unsigned, unsigned> SwizzleRemap;
1444
1445 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1446 for (unsigned i = 0; i < 4; i++) {
1447 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1448 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1449 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1450 }
1451
1452 SwizzleRemap.clear();
1453 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1454 for (unsigned i = 0; i < 4; i++) {
1455 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1456 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1457 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1458 }
1459
1460 return BuildVector;
1461}
1462
1463
Tom Stellard75aadc22012-12-11 21:25:42 +00001464//===----------------------------------------------------------------------===//
1465// Custom DAG Optimizations
1466//===----------------------------------------------------------------------===//
1467
1468SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1469 DAGCombinerInfo &DCI) const {
1470 SelectionDAG &DAG = DCI.DAG;
1471
1472 switch (N->getOpcode()) {
1473 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1474 case ISD::FP_ROUND: {
1475 SDValue Arg = N->getOperand(0);
1476 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001477 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001478 Arg.getOperand(0));
1479 }
1480 break;
1481 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001482
1483 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1484 // (i32 select_cc f32, f32, -1, 0 cc)
1485 //
1486 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1487 // this to one of the SET*_DX10 instructions.
1488 case ISD::FP_TO_SINT: {
1489 SDValue FNeg = N->getOperand(0);
1490 if (FNeg.getOpcode() != ISD::FNEG) {
1491 return SDValue();
1492 }
1493 SDValue SelectCC = FNeg.getOperand(0);
1494 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1495 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1496 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1497 !isHWTrueValue(SelectCC.getOperand(2)) ||
1498 !isHWFalseValue(SelectCC.getOperand(3))) {
1499 return SDValue();
1500 }
1501
Andrew Trickef9de2a2013-05-25 02:42:55 +00001502 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001503 SelectCC.getOperand(0), // LHS
1504 SelectCC.getOperand(1), // RHS
1505 DAG.getConstant(-1, MVT::i32), // True
1506 DAG.getConstant(0, MVT::i32), // Flase
1507 SelectCC.getOperand(4)); // CC
1508
1509 break;
1510 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001511
1512 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1513 // => build_vector elt0, …, NewEltIdx, …, eltN
1514 case ISD::INSERT_VECTOR_ELT: {
1515 SDValue InVec = N->getOperand(0);
1516 SDValue InVal = N->getOperand(1);
1517 SDValue EltNo = N->getOperand(2);
1518 SDLoc dl(N);
1519
1520 // If the inserted element is an UNDEF, just use the input vector.
1521 if (InVal.getOpcode() == ISD::UNDEF)
1522 return InVec;
1523
1524 EVT VT = InVec.getValueType();
1525
1526 // If we can't generate a legal BUILD_VECTOR, exit
1527 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1528 return SDValue();
1529
1530 // Check that we know which element is being inserted
1531 if (!isa<ConstantSDNode>(EltNo))
1532 return SDValue();
1533 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1534
1535 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1536 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1537 // vector elements.
1538 SmallVector<SDValue, 8> Ops;
1539 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1540 Ops.append(InVec.getNode()->op_begin(),
1541 InVec.getNode()->op_end());
1542 } else if (InVec.getOpcode() == ISD::UNDEF) {
1543 unsigned NElts = VT.getVectorNumElements();
1544 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1545 } else {
1546 return SDValue();
1547 }
1548
1549 // Insert the element
1550 if (Elt < Ops.size()) {
1551 // All the operands of BUILD_VECTOR must have the same type;
1552 // we enforce that here.
1553 EVT OpVT = Ops[0].getValueType();
1554 if (InVal.getValueType() != OpVT)
1555 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1556 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1557 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1558 Ops[Elt] = InVal;
1559 }
1560
1561 // Return the new vector
1562 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1563 VT, &Ops[0], Ops.size());
1564 }
1565
Tom Stellard365366f2013-01-23 02:09:06 +00001566 // Extract_vec (Build_vector) generated by custom lowering
1567 // also needs to be customly combined
1568 case ISD::EXTRACT_VECTOR_ELT: {
1569 SDValue Arg = N->getOperand(0);
1570 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1571 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1572 unsigned Element = Const->getZExtValue();
1573 return Arg->getOperand(Element);
1574 }
1575 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001576 if (Arg.getOpcode() == ISD::BITCAST &&
1577 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1578 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1579 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001580 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001581 Arg->getOperand(0).getOperand(Element));
1582 }
1583 }
Tom Stellard365366f2013-01-23 02:09:06 +00001584 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001585
1586 case ISD::SELECT_CC: {
1587 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1588 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001589 //
1590 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1591 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001592 SDValue LHS = N->getOperand(0);
1593 if (LHS.getOpcode() != ISD::SELECT_CC) {
1594 return SDValue();
1595 }
1596
1597 SDValue RHS = N->getOperand(1);
1598 SDValue True = N->getOperand(2);
1599 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001600 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001601
1602 if (LHS.getOperand(2).getNode() != True.getNode() ||
1603 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001604 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001605 return SDValue();
1606 }
1607
Tom Stellard5e524892013-03-08 15:37:11 +00001608 switch (NCC) {
1609 default: return SDValue();
1610 case ISD::SETNE: return LHS;
1611 case ISD::SETEQ: {
1612 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1613 LHSCC = ISD::getSetCCInverse(LHSCC,
1614 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001615 if (DCI.isBeforeLegalizeOps() ||
1616 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1617 return DAG.getSelectCC(SDLoc(N),
1618 LHS.getOperand(0),
1619 LHS.getOperand(1),
1620 LHS.getOperand(2),
1621 LHS.getOperand(3),
1622 LHSCC);
1623 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001624 }
Tom Stellard5e524892013-03-08 15:37:11 +00001625 }
Tom Stellardcd428182013-09-28 02:50:38 +00001626 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001627 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001628
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001629 case AMDGPUISD::EXPORT: {
1630 SDValue Arg = N->getOperand(1);
1631 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1632 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001633
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001634 SDValue NewArgs[8] = {
1635 N->getOperand(0), // Chain
1636 SDValue(),
1637 N->getOperand(2), // ArrayBase
1638 N->getOperand(3), // Type
1639 N->getOperand(4), // SWZ_X
1640 N->getOperand(5), // SWZ_Y
1641 N->getOperand(6), // SWZ_Z
1642 N->getOperand(7) // SWZ_W
1643 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001644 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001645 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001646 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001647 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001648 case AMDGPUISD::TEXTURE_FETCH: {
1649 SDValue Arg = N->getOperand(1);
1650 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1651 break;
1652
1653 SDValue NewArgs[19] = {
1654 N->getOperand(0),
1655 N->getOperand(1),
1656 N->getOperand(2),
1657 N->getOperand(3),
1658 N->getOperand(4),
1659 N->getOperand(5),
1660 N->getOperand(6),
1661 N->getOperand(7),
1662 N->getOperand(8),
1663 N->getOperand(9),
1664 N->getOperand(10),
1665 N->getOperand(11),
1666 N->getOperand(12),
1667 N->getOperand(13),
1668 N->getOperand(14),
1669 N->getOperand(15),
1670 N->getOperand(16),
1671 N->getOperand(17),
1672 N->getOperand(18),
1673 };
1674 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1675 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1676 NewArgs, 19);
1677 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001678 }
1679 return SDValue();
1680}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001681
1682static bool
1683FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001684 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001685 const R600InstrInfo *TII =
1686 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1687 if (!Src.isMachineOpcode())
1688 return false;
1689 switch (Src.getMachineOpcode()) {
1690 case AMDGPU::FNEG_R600:
1691 if (!Neg.getNode())
1692 return false;
1693 Src = Src.getOperand(0);
1694 Neg = DAG.getTargetConstant(1, MVT::i32);
1695 return true;
1696 case AMDGPU::FABS_R600:
1697 if (!Abs.getNode())
1698 return false;
1699 Src = Src.getOperand(0);
1700 Abs = DAG.getTargetConstant(1, MVT::i32);
1701 return true;
1702 case AMDGPU::CONST_COPY: {
1703 unsigned Opcode = ParentNode->getMachineOpcode();
1704 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1705
1706 if (!Sel.getNode())
1707 return false;
1708
1709 SDValue CstOffset = Src.getOperand(0);
1710 if (ParentNode->getValueType(0).isVector())
1711 return false;
1712
1713 // Gather constants values
1714 int SrcIndices[] = {
1715 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1716 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1717 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1718 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1719 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1720 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1721 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1722 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1723 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1724 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1725 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1726 };
1727 std::vector<unsigned> Consts;
1728 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1729 int OtherSrcIdx = SrcIndices[i];
1730 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1731 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1732 continue;
1733 if (HasDst) {
1734 OtherSrcIdx--;
1735 OtherSelIdx--;
1736 }
1737 if (RegisterSDNode *Reg =
1738 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1739 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1740 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1741 ParentNode->getOperand(OtherSelIdx));
1742 Consts.push_back(Cst->getZExtValue());
1743 }
1744 }
1745 }
1746
1747 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1748 Consts.push_back(Cst->getZExtValue());
1749 if (!TII->fitsConstReadLimitations(Consts)) {
1750 return false;
1751 }
1752
1753 Sel = CstOffset;
1754 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1755 return true;
1756 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001757 case AMDGPU::MOV_IMM_I32:
1758 case AMDGPU::MOV_IMM_F32: {
1759 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1760 uint64_t ImmValue = 0;
1761
1762
1763 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1764 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1765 float FloatValue = FPC->getValueAPF().convertToFloat();
1766 if (FloatValue == 0.0) {
1767 ImmReg = AMDGPU::ZERO;
1768 } else if (FloatValue == 0.5) {
1769 ImmReg = AMDGPU::HALF;
1770 } else if (FloatValue == 1.0) {
1771 ImmReg = AMDGPU::ONE;
1772 } else {
1773 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1774 }
1775 } else {
1776 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1777 uint64_t Value = C->getZExtValue();
1778 if (Value == 0) {
1779 ImmReg = AMDGPU::ZERO;
1780 } else if (Value == 1) {
1781 ImmReg = AMDGPU::ONE_INT;
1782 } else {
1783 ImmValue = Value;
1784 }
1785 }
1786
1787 // Check that we aren't already using an immediate.
1788 // XXX: It's possible for an instruction to have more than one
1789 // immediate operand, but this is not supported yet.
1790 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1791 if (!Imm.getNode())
1792 return false;
1793 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1794 assert(C);
1795 if (C->getZExtValue())
1796 return false;
1797 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1798 }
1799 Src = DAG.getRegister(ImmReg, MVT::i32);
1800 return true;
1801 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001802 default:
1803 return false;
1804 }
1805}
1806
1807
1808/// \brief Fold the instructions after selecting them
1809SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1810 SelectionDAG &DAG) const {
1811 const R600InstrInfo *TII =
1812 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1813 if (!Node->isMachineOpcode())
1814 return Node;
1815 unsigned Opcode = Node->getMachineOpcode();
1816 SDValue FakeOp;
1817
1818 std::vector<SDValue> Ops;
1819 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1820 I != E; ++I)
1821 Ops.push_back(*I);
1822
1823 if (Opcode == AMDGPU::DOT_4) {
1824 int OperandIdx[] = {
1825 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1826 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1827 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1828 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1829 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1830 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1831 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1832 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1833 };
1834 int NegIdx[] = {
1835 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1836 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1837 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1838 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1839 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1840 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1841 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1842 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1843 };
1844 int AbsIdx[] = {
1845 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1846 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1847 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1848 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1849 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1850 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1851 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1852 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1853 };
1854 for (unsigned i = 0; i < 8; i++) {
1855 if (OperandIdx[i] < 0)
1856 return Node;
1857 SDValue &Src = Ops[OperandIdx[i] - 1];
1858 SDValue &Neg = Ops[NegIdx[i] - 1];
1859 SDValue &Abs = Ops[AbsIdx[i] - 1];
1860 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1861 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1862 if (HasDst)
1863 SelIdx--;
1864 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001865 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1866 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1867 }
1868 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1869 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1870 SDValue &Src = Ops[i];
1871 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001872 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1873 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001874 } else if (Opcode == AMDGPU::CLAMP_R600) {
1875 SDValue Src = Node->getOperand(0);
1876 if (!Src.isMachineOpcode() ||
1877 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1878 return Node;
1879 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1880 AMDGPU::OpName::clamp);
1881 if (ClampIdx < 0)
1882 return Node;
1883 std::vector<SDValue> Ops;
1884 unsigned NumOp = Src.getNumOperands();
1885 for(unsigned i = 0; i < NumOp; ++i)
1886 Ops.push_back(Src.getOperand(i));
1887 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1888 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1889 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001890 } else {
1891 if (!TII->hasInstrModifiers(Opcode))
1892 return Node;
1893 int OperandIdx[] = {
1894 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1895 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1896 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1897 };
1898 int NegIdx[] = {
1899 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1900 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1901 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1902 };
1903 int AbsIdx[] = {
1904 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1905 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1906 -1
1907 };
1908 for (unsigned i = 0; i < 3; i++) {
1909 if (OperandIdx[i] < 0)
1910 return Node;
1911 SDValue &Src = Ops[OperandIdx[i] - 1];
1912 SDValue &Neg = Ops[NegIdx[i] - 1];
1913 SDValue FakeAbs;
1914 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1915 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1916 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001917 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1918 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001919 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001920 ImmIdx--;
1921 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001922 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001923 SDValue &Imm = Ops[ImmIdx];
1924 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001925 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1926 }
1927 }
1928
1929 return Node;
1930}