blob: e17dcd4b73f76e8d14fb8d6f981856a5f23c530c [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000093 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
95 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000097 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000098 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000099 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000101 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
102 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
Tom Stellard365366f2013-01-23 02:09:06 +0000104 setOperationAction(ISD::LOAD, MVT::i32, Custom);
105 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000106 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
107
Tom Stellard75aadc22012-12-11 21:25:42 +0000108 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000109 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000111 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000112 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000113
Michel Danzer49812b52013-07-10 16:37:07 +0000114 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
115
Tom Stellardb852af52013-03-08 15:37:03 +0000116 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000117 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000118 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119}
120
121MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
122 MachineInstr * MI, MachineBasicBlock * BB) const {
123 MachineFunction * MF = BB->getParent();
124 MachineRegisterInfo &MRI = MF->getRegInfo();
125 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000126 const R600InstrInfo *TII =
127 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
129 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000130 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000131 if (TII->isLDSInstr(MI->getOpcode()) &&
132 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
133 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
134 assert(DstIdx != -1);
135 MachineInstrBuilder NewMI;
136 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
137 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
138 AMDGPU::OQAP);
139 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
140 MI->getOperand(0).getReg(),
141 AMDGPU::OQAP);
142 } else {
143 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
144 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
145 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000146 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
147 NewMI.addOperand(MI->getOperand(i));
148 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000149 } else {
150 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
151 }
152 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 case AMDGPU::CLAMP_R600: {
154 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
155 AMDGPU::MOV,
156 MI->getOperand(0).getReg(),
157 MI->getOperand(1).getReg());
158 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
159 break;
160 }
161
162 case AMDGPU::FABS_R600: {
163 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
164 AMDGPU::MOV,
165 MI->getOperand(0).getReg(),
166 MI->getOperand(1).getReg());
167 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
168 break;
169 }
170
171 case AMDGPU::FNEG_R600: {
172 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
173 AMDGPU::MOV,
174 MI->getOperand(0).getReg(),
175 MI->getOperand(1).getReg());
176 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
177 break;
178 }
179
Tom Stellard75aadc22012-12-11 21:25:42 +0000180 case AMDGPU::MASK_WRITE: {
181 unsigned maskedRegister = MI->getOperand(0).getReg();
182 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
183 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
184 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
185 break;
186 }
187
188 case AMDGPU::MOV_IMM_F32:
189 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
190 MI->getOperand(1).getFPImm()->getValueAPF()
191 .bitcastToAPInt().getZExtValue());
192 break;
193 case AMDGPU::MOV_IMM_I32:
194 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
195 MI->getOperand(1).getImm());
196 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000197 case AMDGPU::CONST_COPY: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
199 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000200 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000201 MI->getOperand(1).getImm());
202 break;
203 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000206 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000207 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
208 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
209
210 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
211 .addOperand(MI->getOperand(0))
212 .addOperand(MI->getOperand(1))
213 .addImm(EOP); // Set End of program bit
214 break;
215 }
216
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::TXD: {
218 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
219 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000220 MachineOperand &RID = MI->getOperand(4);
221 MachineOperand &SID = MI->getOperand(5);
222 unsigned TextureId = MI->getOperand(6).getImm();
223 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
224 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 switch (TextureId) {
227 case 5: // Rect
228 CTX = CTY = 0;
229 break;
230 case 6: // Shadow1D
231 SrcW = SrcZ;
232 break;
233 case 7: // Shadow2D
234 SrcW = SrcZ;
235 break;
236 case 8: // ShadowRect
237 CTX = CTY = 0;
238 SrcW = SrcZ;
239 break;
240 case 9: // 1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 10: // 2DArray
245 CTZ = 0;
246 break;
247 case 11: // Shadow1DArray
248 SrcZ = SrcY;
249 CTZ = 0;
250 break;
251 case 12: // Shadow2DArray
252 CTZ = 0;
253 break;
254 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
256 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 .addImm(SrcX)
258 .addImm(SrcY)
259 .addImm(SrcZ)
260 .addImm(SrcW)
261 .addImm(0)
262 .addImm(0)
263 .addImm(0)
264 .addImm(0)
265 .addImm(1)
266 .addImm(2)
267 .addImm(3)
268 .addOperand(RID)
269 .addOperand(SID)
270 .addImm(CTX)
271 .addImm(CTY)
272 .addImm(CTZ)
273 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
275 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000276 .addImm(SrcX)
277 .addImm(SrcY)
278 .addImm(SrcZ)
279 .addImm(SrcW)
280 .addImm(0)
281 .addImm(0)
282 .addImm(0)
283 .addImm(0)
284 .addImm(1)
285 .addImm(2)
286 .addImm(3)
287 .addOperand(RID)
288 .addOperand(SID)
289 .addImm(CTX)
290 .addImm(CTY)
291 .addImm(CTZ)
292 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 .addImm(SrcX)
297 .addImm(SrcY)
298 .addImm(SrcZ)
299 .addImm(SrcW)
300 .addImm(0)
301 .addImm(0)
302 .addImm(0)
303 .addImm(0)
304 .addImm(1)
305 .addImm(2)
306 .addImm(3)
307 .addOperand(RID)
308 .addOperand(SID)
309 .addImm(CTX)
310 .addImm(CTY)
311 .addImm(CTZ)
312 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000313 .addReg(T0, RegState::Implicit)
314 .addReg(T1, RegState::Implicit);
315 break;
316 }
317
318 case AMDGPU::TXD_SHADOW: {
319 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
320 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 MachineOperand &RID = MI->getOperand(4);
322 MachineOperand &SID = MI->getOperand(5);
323 unsigned TextureId = MI->getOperand(6).getImm();
324 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
325 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
326
327 switch (TextureId) {
328 case 5: // Rect
329 CTX = CTY = 0;
330 break;
331 case 6: // Shadow1D
332 SrcW = SrcZ;
333 break;
334 case 7: // Shadow2D
335 SrcW = SrcZ;
336 break;
337 case 8: // ShadowRect
338 CTX = CTY = 0;
339 SrcW = SrcZ;
340 break;
341 case 9: // 1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 10: // 2DArray
346 CTZ = 0;
347 break;
348 case 11: // Shadow1DArray
349 SrcZ = SrcY;
350 CTZ = 0;
351 break;
352 case 12: // Shadow2DArray
353 CTZ = 0;
354 break;
355 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000356
357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
358 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
377 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000378 .addImm(SrcX)
379 .addImm(SrcY)
380 .addImm(SrcZ)
381 .addImm(SrcW)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(0)
386 .addImm(1)
387 .addImm(2)
388 .addImm(3)
389 .addOperand(RID)
390 .addOperand(SID)
391 .addImm(CTX)
392 .addImm(CTY)
393 .addImm(CTZ)
394 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
396 .addOperand(MI->getOperand(0))
397 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000398 .addImm(SrcX)
399 .addImm(SrcY)
400 .addImm(SrcZ)
401 .addImm(SrcW)
402 .addImm(0)
403 .addImm(0)
404 .addImm(0)
405 .addImm(0)
406 .addImm(1)
407 .addImm(2)
408 .addImm(3)
409 .addOperand(RID)
410 .addOperand(SID)
411 .addImm(CTX)
412 .addImm(CTY)
413 .addImm(CTZ)
414 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000415 .addReg(T0, RegState::Implicit)
416 .addReg(T1, RegState::Implicit);
417 break;
418 }
419
420 case AMDGPU::BRANCH:
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000422 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 break;
424
425 case AMDGPU::BRANCH_COND_f32: {
426 MachineInstr *NewMI =
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
428 AMDGPU::PREDICATE_BIT)
429 .addOperand(MI->getOperand(1))
430 .addImm(OPCODE_IS_NOT_ZERO)
431 .addImm(0); // Flags
432 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000433 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000434 .addOperand(MI->getOperand(0))
435 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
436 break;
437 }
438
439 case AMDGPU::BRANCH_COND_i32: {
440 MachineInstr *NewMI =
441 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
442 AMDGPU::PREDICATE_BIT)
443 .addOperand(MI->getOperand(1))
444 .addImm(OPCODE_IS_NOT_ZERO_INT)
445 .addImm(0); // Flags
446 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 .addOperand(MI->getOperand(0))
449 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
450 break;
451 }
452
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 case AMDGPU::EG_ExportSwz:
454 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 // Instruction is left unmodified if its not the last one of its type
456 bool isLastInstructionOfItsType = true;
457 unsigned InstExportType = MI->getOperand(1).getImm();
458 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
459 EndBlock = BB->end(); NextExportInst != EndBlock;
460 NextExportInst = llvm::next(NextExportInst)) {
461 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
462 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
463 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
464 .getImm();
465 if (CurrentInstExportType == InstExportType) {
466 isLastInstructionOfItsType = false;
467 break;
468 }
469 }
470 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000472 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 return BB;
474 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
476 .addOperand(MI->getOperand(0))
477 .addOperand(MI->getOperand(1))
478 .addOperand(MI->getOperand(2))
479 .addOperand(MI->getOperand(3))
480 .addOperand(MI->getOperand(4))
481 .addOperand(MI->getOperand(5))
482 .addOperand(MI->getOperand(6))
483 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 break;
486 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000487 case AMDGPU::RETURN: {
488 // RETURN instructions must have the live-out registers as implicit uses,
489 // otherwise they appear dead.
490 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
491 MachineInstrBuilder MIB(*MF, MI);
492 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
493 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
494 return BB;
495 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 }
497
498 MI->eraseFromParent();
499 return BB;
500}
501
502//===----------------------------------------------------------------------===//
503// Custom DAG Lowering Operations
504//===----------------------------------------------------------------------===//
505
Tom Stellard75aadc22012-12-11 21:25:42 +0000506SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000507 MachineFunction &MF = DAG.getMachineFunction();
508 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 switch (Op.getOpcode()) {
510 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000511 case ISD::FCOS:
512 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000515 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000516 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000517 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 case ISD::INTRINSIC_VOID: {
519 SDValue Chain = Op.getOperand(0);
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
522 switch (IntrinsicID) {
523 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
525 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000526 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000527 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000529 case AMDGPUIntrinsic::R600_store_swizzle: {
530 const SDValue Args[8] = {
531 Chain,
532 Op.getOperand(2), // Export Value
533 Op.getOperand(3), // ArrayBase
534 Op.getOperand(4), // Type
535 DAG.getConstant(0, MVT::i32), // SWZ_X
536 DAG.getConstant(1, MVT::i32), // SWZ_Y
537 DAG.getConstant(2, MVT::i32), // SWZ_Z
538 DAG.getConstant(3, MVT::i32) // SWZ_W
539 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000540 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000541 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 // default for switch(IntrinsicID)
545 default: break;
546 }
547 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
548 break;
549 }
550 case ISD::INTRINSIC_WO_CHAIN: {
551 unsigned IntrinsicID =
552 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
553 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000554 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 switch(IntrinsicID) {
556 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
557 case AMDGPUIntrinsic::R600_load_input: {
558 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
559 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000560 MachineFunction &MF = DAG.getMachineFunction();
561 MachineRegisterInfo &MRI = MF.getRegInfo();
562 MRI.addLiveIn(Reg);
563 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000564 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000565 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000566
567 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000569 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
570 MachineSDNode *interp;
571 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000572 const MachineFunction &MF = DAG.getMachineFunction();
573 const R600InstrInfo *TII =
574 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000575 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
576 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
577 return DAG.getTargetExtractSubreg(
578 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
579 DL, MVT::f32, SDValue(interp, 0));
580 }
581
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
Tom Stellard41afe6a2013-02-05 17:09:14 +0000593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000596 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000600 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000601 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000603 case AMDGPUIntrinsic::R600_tex:
604 case AMDGPUIntrinsic::R600_texc:
605 case AMDGPUIntrinsic::R600_txl:
606 case AMDGPUIntrinsic::R600_txlc:
607 case AMDGPUIntrinsic::R600_txb:
608 case AMDGPUIntrinsic::R600_txbc:
609 case AMDGPUIntrinsic::R600_txf:
610 case AMDGPUIntrinsic::R600_txq:
611 case AMDGPUIntrinsic::R600_ddx:
612 case AMDGPUIntrinsic::R600_ddy: {
613 unsigned TextureOp;
614 switch (IntrinsicID) {
615 case AMDGPUIntrinsic::R600_tex:
616 TextureOp = 0;
617 break;
618 case AMDGPUIntrinsic::R600_texc:
619 TextureOp = 1;
620 break;
621 case AMDGPUIntrinsic::R600_txl:
622 TextureOp = 2;
623 break;
624 case AMDGPUIntrinsic::R600_txlc:
625 TextureOp = 3;
626 break;
627 case AMDGPUIntrinsic::R600_txb:
628 TextureOp = 4;
629 break;
630 case AMDGPUIntrinsic::R600_txbc:
631 TextureOp = 5;
632 break;
633 case AMDGPUIntrinsic::R600_txf:
634 TextureOp = 6;
635 break;
636 case AMDGPUIntrinsic::R600_txq:
637 TextureOp = 7;
638 break;
639 case AMDGPUIntrinsic::R600_ddx:
640 TextureOp = 8;
641 break;
642 case AMDGPUIntrinsic::R600_ddy:
643 TextureOp = 9;
644 break;
645 default:
646 llvm_unreachable("Unknow Texture Operation");
647 }
648
649 SDValue TexArgs[19] = {
650 DAG.getConstant(TextureOp, MVT::i32),
651 Op.getOperand(1),
652 DAG.getConstant(0, MVT::i32),
653 DAG.getConstant(1, MVT::i32),
654 DAG.getConstant(2, MVT::i32),
655 DAG.getConstant(3, MVT::i32),
656 Op.getOperand(2),
657 Op.getOperand(3),
658 Op.getOperand(4),
659 DAG.getConstant(0, MVT::i32),
660 DAG.getConstant(1, MVT::i32),
661 DAG.getConstant(2, MVT::i32),
662 DAG.getConstant(3, MVT::i32),
663 Op.getOperand(5),
664 Op.getOperand(6),
665 Op.getOperand(7),
666 Op.getOperand(8),
667 Op.getOperand(9),
668 Op.getOperand(10)
669 };
670 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
671 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000672 case AMDGPUIntrinsic::AMDGPU_dp4: {
673 SDValue Args[8] = {
674 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
675 DAG.getConstant(0, MVT::i32)),
676 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
677 DAG.getConstant(0, MVT::i32)),
678 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
679 DAG.getConstant(1, MVT::i32)),
680 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
681 DAG.getConstant(1, MVT::i32)),
682 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
683 DAG.getConstant(2, MVT::i32)),
684 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
685 DAG.getConstant(2, MVT::i32)),
686 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
687 DAG.getConstant(3, MVT::i32)),
688 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
689 DAG.getConstant(3, MVT::i32))
690 };
691 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
692 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000693
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000694 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000695 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000696 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000697 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000700 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000702 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000704 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000705 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000708 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000709 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000710 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000711 return LowerImplicitParameter(DAG, VT, DL, 8);
712
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000713 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000714 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
715 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000716 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000717 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
718 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000719 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000720 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
721 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000722 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
724 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000725 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000726 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
727 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000728 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
730 AMDGPU::T0_Z, VT);
731 }
732 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
733 break;
734 }
735 } // end switch(Op.getOpcode())
736 return SDValue();
737}
738
739void R600TargetLowering::ReplaceNodeResults(SDNode *N,
740 SmallVectorImpl<SDValue> &Results,
741 SelectionDAG &DAG) const {
742 switch (N->getOpcode()) {
743 default: return;
744 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000745 return;
746 case ISD::LOAD: {
747 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
748 Results.push_back(SDValue(Node, 0));
749 Results.push_back(SDValue(Node, 1));
750 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
751 // function
752 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
753 return;
754 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000755 case ISD::STORE:
756 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
757 Results.push_back(SDValue(Node, 0));
758 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000759 }
760}
761
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000762SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
763 // On hw >= R700, COS/SIN input must be between -1. and 1.
764 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
765 EVT VT = Op.getValueType();
766 SDValue Arg = Op.getOperand(0);
767 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
768 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
769 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
770 DAG.getConstantFP(0.15915494309, MVT::f32)),
771 DAG.getConstantFP(0.5, MVT::f32)));
772 unsigned TrigNode;
773 switch (Op.getOpcode()) {
774 case ISD::FCOS:
775 TrigNode = AMDGPUISD::COS_HW;
776 break;
777 case ISD::FSIN:
778 TrigNode = AMDGPUISD::SIN_HW;
779 break;
780 default:
781 llvm_unreachable("Wrong trig opcode");
782 }
783 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
784 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
785 DAG.getConstantFP(-0.5, MVT::f32)));
786 if (Gen >= AMDGPUSubtarget::R700)
787 return TrigVal;
788 // On R600 hw, COS/SIN input must be between -Pi and Pi.
789 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
790 DAG.getConstantFP(3.14159265359, MVT::f32));
791}
792
Tom Stellard75aadc22012-12-11 21:25:42 +0000793SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
794 return DAG.getNode(
795 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000796 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 MVT::i1,
798 Op, DAG.getConstantFP(0.0f, MVT::f32),
799 DAG.getCondCode(ISD::SETNE)
800 );
801}
802
Tom Stellard75aadc22012-12-11 21:25:42 +0000803SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000804 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000805 unsigned DwordOffset) const {
806 unsigned ByteOffset = DwordOffset * 4;
807 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000808 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000809
810 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
811 assert(isInt<16>(ByteOffset));
812
813 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
814 DAG.getConstant(ByteOffset, MVT::i32), // PTR
815 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
816 false, false, false, 0);
817}
818
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000819SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
820
821 MachineFunction &MF = DAG.getMachineFunction();
822 const AMDGPUFrameLowering *TFL =
823 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
824
825 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
826 assert(FIN);
827
828 unsigned FrameIndex = FIN->getIndex();
829 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
830 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
831}
832
Tom Stellard75aadc22012-12-11 21:25:42 +0000833bool R600TargetLowering::isZero(SDValue Op) const {
834 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
835 return Cst->isNullValue();
836 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
837 return CstFP->isZero();
838 } else {
839 return false;
840 }
841}
842
843SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000844 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000845 EVT VT = Op.getValueType();
846
847 SDValue LHS = Op.getOperand(0);
848 SDValue RHS = Op.getOperand(1);
849 SDValue True = Op.getOperand(2);
850 SDValue False = Op.getOperand(3);
851 SDValue CC = Op.getOperand(4);
852 SDValue Temp;
853
854 // LHS and RHS are guaranteed to be the same value type
855 EVT CompareVT = LHS.getValueType();
856
857 // Check if we can lower this to a native operation.
858
Tom Stellard2add82d2013-03-08 15:37:09 +0000859 // Try to lower to a SET* instruction:
860 //
861 // SET* can match the following patterns:
862 //
Tom Stellardcd428182013-09-28 02:50:38 +0000863 // select_cc f32, f32, -1, 0, cc_supported
864 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
865 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000866 //
867
868 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000869 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
870 ISD::CondCode InverseCC =
871 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000872 if (isHWTrueValue(False) && isHWFalseValue(True)) {
873 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
874 std::swap(False, True);
875 CC = DAG.getCondCode(InverseCC);
876 } else {
877 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
878 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
879 std::swap(False, True);
880 std::swap(LHS, RHS);
881 CC = DAG.getCondCode(SwapInvCC);
882 }
883 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000884 }
885
886 if (isHWTrueValue(True) && isHWFalseValue(False) &&
887 (CompareVT == VT || VT == MVT::i32)) {
888 // This can be matched by a SET* instruction.
889 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
890 }
891
Tom Stellard75aadc22012-12-11 21:25:42 +0000892 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000893 //
894 // CND* can match the following patterns:
895 //
Tom Stellardcd428182013-09-28 02:50:38 +0000896 // select_cc f32, 0.0, f32, f32, cc_supported
897 // select_cc f32, 0.0, i32, i32, cc_supported
898 // select_cc i32, 0, f32, f32, cc_supported
899 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000900 //
Tom Stellardcd428182013-09-28 02:50:38 +0000901
902 // Try to move the zero value to the RHS
903 if (isZero(LHS)) {
904 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
905 // Try swapping the operands
906 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
907 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
908 std::swap(LHS, RHS);
909 CC = DAG.getCondCode(CCSwapped);
910 } else {
911 // Try inverting the conditon and then swapping the operands
912 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
913 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
914 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
915 std::swap(True, False);
916 std::swap(LHS, RHS);
917 CC = DAG.getCondCode(CCSwapped);
918 }
919 }
920 }
921 if (isZero(RHS)) {
922 SDValue Cond = LHS;
923 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000924 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
925 if (CompareVT != VT) {
926 // Bitcast True / False to the correct types. This will end up being
927 // a nop, but it allows us to define only a single pattern in the
928 // .TD files for each CND* instruction rather than having to have
929 // one pattern for integer True/False and one for fp True/False
930 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
931 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
932 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000933
934 switch (CCOpcode) {
935 case ISD::SETONE:
936 case ISD::SETUNE:
937 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000938 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
939 Temp = True;
940 True = False;
941 False = Temp;
942 break;
943 default:
944 break;
945 }
946 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
947 Cond, Zero,
948 True, False,
949 DAG.getCondCode(CCOpcode));
950 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
951 }
952
Tom Stellard75aadc22012-12-11 21:25:42 +0000953
954 // Possible Min/Max pattern
955 SDValue MinMax = LowerMinMax(Op, DAG);
956 if (MinMax.getNode()) {
957 return MinMax;
958 }
959
960 // If we make it this for it means we have no native instructions to handle
961 // this SELECT_CC, so we must lower it.
962 SDValue HWTrue, HWFalse;
963
964 if (CompareVT == MVT::f32) {
965 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
966 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
967 } else if (CompareVT == MVT::i32) {
968 HWTrue = DAG.getConstant(-1, CompareVT);
969 HWFalse = DAG.getConstant(0, CompareVT);
970 }
971 else {
972 assert(!"Unhandled value type in LowerSELECT_CC");
973 }
974
975 // Lower this unsupported SELECT_CC into a combination of two supported
976 // SELECT_CC operations.
977 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
978
979 return DAG.getNode(ISD::SELECT_CC, DL, VT,
980 Cond, HWFalse,
981 True, False,
982 DAG.getCondCode(ISD::SETNE));
983}
984
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000985/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
986/// convert these pointers to a register index. Each register holds
987/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
988/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
989/// for indirect addressing.
990SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
991 unsigned StackWidth,
992 SelectionDAG &DAG) const {
993 unsigned SRLPad;
994 switch(StackWidth) {
995 case 1:
996 SRLPad = 2;
997 break;
998 case 2:
999 SRLPad = 3;
1000 break;
1001 case 4:
1002 SRLPad = 4;
1003 break;
1004 default: llvm_unreachable("Invalid stack width");
1005 }
1006
Andrew Trickef9de2a2013-05-25 02:42:55 +00001007 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001008 DAG.getConstant(SRLPad, MVT::i32));
1009}
1010
1011void R600TargetLowering::getStackAddress(unsigned StackWidth,
1012 unsigned ElemIdx,
1013 unsigned &Channel,
1014 unsigned &PtrIncr) const {
1015 switch (StackWidth) {
1016 default:
1017 case 1:
1018 Channel = 0;
1019 if (ElemIdx > 0) {
1020 PtrIncr = 1;
1021 } else {
1022 PtrIncr = 0;
1023 }
1024 break;
1025 case 2:
1026 Channel = ElemIdx % 2;
1027 if (ElemIdx == 2) {
1028 PtrIncr = 1;
1029 } else {
1030 PtrIncr = 0;
1031 }
1032 break;
1033 case 4:
1034 Channel = ElemIdx;
1035 PtrIncr = 0;
1036 break;
1037 }
1038}
1039
Tom Stellard75aadc22012-12-11 21:25:42 +00001040SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001041 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001042 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1043 SDValue Chain = Op.getOperand(0);
1044 SDValue Value = Op.getOperand(1);
1045 SDValue Ptr = Op.getOperand(2);
1046
Tom Stellard2ffc3302013-08-26 15:05:44 +00001047 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001048 if (Result.getNode()) {
1049 return Result;
1050 }
1051
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001052 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1053 if (StoreNode->isTruncatingStore()) {
1054 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001055 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001056 EVT MemVT = StoreNode->getMemoryVT();
1057 SDValue MaskConstant;
1058 if (MemVT == MVT::i8) {
1059 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1060 } else {
1061 assert(MemVT == MVT::i16);
1062 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1063 }
1064 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1065 DAG.getConstant(2, MVT::i32));
1066 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1067 DAG.getConstant(0x00000003, VT));
1068 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1069 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1070 DAG.getConstant(3, VT));
1071 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1072 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1073 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1074 // vector instead.
1075 SDValue Src[4] = {
1076 ShiftedValue,
1077 DAG.getConstant(0, MVT::i32),
1078 DAG.getConstant(0, MVT::i32),
1079 Mask
1080 };
1081 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1082 SDValue Args[3] = { Chain, Input, DWordAddr };
1083 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1084 Op->getVTList(), Args, 3, MemVT,
1085 StoreNode->getMemOperand());
1086 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1087 Value.getValueType().bitsGE(MVT::i32)) {
1088 // Convert pointer from byte address to dword address.
1089 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1090 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1091 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001092
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001093 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1094 assert(!"Truncated and indexed stores not supported yet");
1095 } else {
1096 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1097 }
1098 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001099 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001100 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001101
1102 EVT ValueVT = Value.getValueType();
1103
1104 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1105 return SDValue();
1106 }
1107
1108 // Lowering for indirect addressing
1109
1110 const MachineFunction &MF = DAG.getMachineFunction();
1111 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1112 getTargetMachine().getFrameLowering());
1113 unsigned StackWidth = TFL->getStackWidth(MF);
1114
1115 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1116
1117 if (ValueVT.isVector()) {
1118 unsigned NumElemVT = ValueVT.getVectorNumElements();
1119 EVT ElemVT = ValueVT.getVectorElementType();
1120 SDValue Stores[4];
1121
1122 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1123 "vector width in load");
1124
1125 for (unsigned i = 0; i < NumElemVT; ++i) {
1126 unsigned Channel, PtrIncr;
1127 getStackAddress(StackWidth, i, Channel, PtrIncr);
1128 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1129 DAG.getConstant(PtrIncr, MVT::i32));
1130 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1131 Value, DAG.getConstant(i, MVT::i32));
1132
1133 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1134 Chain, Elem, Ptr,
1135 DAG.getTargetConstant(Channel, MVT::i32));
1136 }
1137 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1138 } else {
1139 if (ValueVT == MVT::i8) {
1140 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1141 }
1142 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001143 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001144 }
1145
1146 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001147}
1148
Tom Stellard365366f2013-01-23 02:09:06 +00001149// return (512 + (kc_bank << 12)
1150static int
1151ConstantAddressBlock(unsigned AddressSpace) {
1152 switch (AddressSpace) {
1153 case AMDGPUAS::CONSTANT_BUFFER_0:
1154 return 512;
1155 case AMDGPUAS::CONSTANT_BUFFER_1:
1156 return 512 + 4096;
1157 case AMDGPUAS::CONSTANT_BUFFER_2:
1158 return 512 + 4096 * 2;
1159 case AMDGPUAS::CONSTANT_BUFFER_3:
1160 return 512 + 4096 * 3;
1161 case AMDGPUAS::CONSTANT_BUFFER_4:
1162 return 512 + 4096 * 4;
1163 case AMDGPUAS::CONSTANT_BUFFER_5:
1164 return 512 + 4096 * 5;
1165 case AMDGPUAS::CONSTANT_BUFFER_6:
1166 return 512 + 4096 * 6;
1167 case AMDGPUAS::CONSTANT_BUFFER_7:
1168 return 512 + 4096 * 7;
1169 case AMDGPUAS::CONSTANT_BUFFER_8:
1170 return 512 + 4096 * 8;
1171 case AMDGPUAS::CONSTANT_BUFFER_9:
1172 return 512 + 4096 * 9;
1173 case AMDGPUAS::CONSTANT_BUFFER_10:
1174 return 512 + 4096 * 10;
1175 case AMDGPUAS::CONSTANT_BUFFER_11:
1176 return 512 + 4096 * 11;
1177 case AMDGPUAS::CONSTANT_BUFFER_12:
1178 return 512 + 4096 * 12;
1179 case AMDGPUAS::CONSTANT_BUFFER_13:
1180 return 512 + 4096 * 13;
1181 case AMDGPUAS::CONSTANT_BUFFER_14:
1182 return 512 + 4096 * 14;
1183 case AMDGPUAS::CONSTANT_BUFFER_15:
1184 return 512 + 4096 * 15;
1185 default:
1186 return -1;
1187 }
1188}
1189
1190SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1191{
1192 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001193 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001194 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1195 SDValue Chain = Op.getOperand(0);
1196 SDValue Ptr = Op.getOperand(1);
1197 SDValue LoweredLoad;
1198
Tom Stellard35bb18c2013-08-26 15:06:04 +00001199 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1200 SDValue MergedValues[2] = {
1201 SplitVectorLoad(Op, DAG),
1202 Chain
1203 };
1204 return DAG.getMergeValues(MergedValues, 2, DL);
1205 }
1206
Tom Stellard365366f2013-01-23 02:09:06 +00001207 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1208 if (ConstantBlock > -1) {
1209 SDValue Result;
1210 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001211 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1212 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001213 SDValue Slots[4];
1214 for (unsigned i = 0; i < 4; i++) {
1215 // We want Const position encoded with the following formula :
1216 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1217 // const_index is Ptr computed by llvm using an alignment of 16.
1218 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1219 // then div by 4 at the ISel step
1220 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1221 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1222 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1223 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001224 EVT NewVT = MVT::v4i32;
1225 unsigned NumElements = 4;
1226 if (VT.isVector()) {
1227 NewVT = VT;
1228 NumElements = VT.getVectorNumElements();
1229 }
1230 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001231 } else {
1232 // non constant ptr cant be folded, keeps it as a v4f32 load
1233 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001234 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001235 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001236 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001237 );
1238 }
1239
1240 if (!VT.isVector()) {
1241 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1242 DAG.getConstant(0, MVT::i32));
1243 }
1244
1245 SDValue MergedValues[2] = {
1246 Result,
1247 Chain
1248 };
1249 return DAG.getMergeValues(MergedValues, 2, DL);
1250 }
1251
Tom Stellard84021442013-07-23 01:48:24 +00001252 // For most operations returning SDValue() will result int he node being
1253 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1254 // we need to manually expand loads that may be legal in some address spaces
1255 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1256 // for compute shaders, since the data is sign extended when it is uploaded
1257 // to the buffer. Howerver SEXT loads from other addresspaces are not
1258 // supported, so we need to expand them here.
1259 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1260 EVT MemVT = LoadNode->getMemoryVT();
1261 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1262 SDValue ShiftAmount =
1263 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1264 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1265 LoadNode->getPointerInfo(), MemVT,
1266 LoadNode->isVolatile(),
1267 LoadNode->isNonTemporal(),
1268 LoadNode->getAlignment());
1269 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1270 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1271
1272 SDValue MergedValues[2] = { Sra, Chain };
1273 return DAG.getMergeValues(MergedValues, 2, DL);
1274 }
1275
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001276 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1277 return SDValue();
1278 }
1279
1280 // Lowering for indirect addressing
1281 const MachineFunction &MF = DAG.getMachineFunction();
1282 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1283 getTargetMachine().getFrameLowering());
1284 unsigned StackWidth = TFL->getStackWidth(MF);
1285
1286 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1287
1288 if (VT.isVector()) {
1289 unsigned NumElemVT = VT.getVectorNumElements();
1290 EVT ElemVT = VT.getVectorElementType();
1291 SDValue Loads[4];
1292
1293 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1294 "vector width in load");
1295
1296 for (unsigned i = 0; i < NumElemVT; ++i) {
1297 unsigned Channel, PtrIncr;
1298 getStackAddress(StackWidth, i, Channel, PtrIncr);
1299 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1300 DAG.getConstant(PtrIncr, MVT::i32));
1301 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1302 Chain, Ptr,
1303 DAG.getTargetConstant(Channel, MVT::i32),
1304 Op.getOperand(2));
1305 }
1306 for (unsigned i = NumElemVT; i < 4; ++i) {
1307 Loads[i] = DAG.getUNDEF(ElemVT);
1308 }
1309 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1310 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1311 } else {
1312 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1313 Chain, Ptr,
1314 DAG.getTargetConstant(0, MVT::i32), // Channel
1315 Op.getOperand(2));
1316 }
1317
1318 SDValue Ops[2];
1319 Ops[0] = LoweredLoad;
1320 Ops[1] = Chain;
1321
1322 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001323}
Tom Stellard75aadc22012-12-11 21:25:42 +00001324
Tom Stellard75aadc22012-12-11 21:25:42 +00001325/// XXX Only kernel functions are supported, so we can assume for now that
1326/// every function is a kernel function, but in the future we should use
1327/// separate calling conventions for kernel and non-kernel functions.
1328SDValue R600TargetLowering::LowerFormalArguments(
1329 SDValue Chain,
1330 CallingConv::ID CallConv,
1331 bool isVarArg,
1332 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001333 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001334 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001335 SmallVector<CCValAssign, 16> ArgLocs;
1336 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1337 getTargetMachine(), ArgLocs, *DAG.getContext());
1338
1339 AnalyzeFormalArguments(CCInfo, Ins);
1340
Tom Stellard1e803092013-07-23 01:48:18 +00001341 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001342 CCValAssign &VA = ArgLocs[i];
1343 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001344
Tom Stellard75aadc22012-12-11 21:25:42 +00001345 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001346 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001347
1348 // The first 36 bytes of the input buffer contains information about
1349 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001350 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001351 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001352 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1353 false, false, 4); // 4 is the prefered alignment for
1354 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001355 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001356 }
1357 return Chain;
1358}
1359
Matt Arsenault758659232013-05-18 00:21:46 +00001360EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001361 if (!VT.isVector()) return MVT::i32;
1362 return VT.changeVectorElementTypeToInteger();
1363}
1364
Benjamin Kramer193960c2013-06-11 13:32:25 +00001365static SDValue
1366CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1367 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001368 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1369 assert(RemapSwizzle.empty());
1370 SDValue NewBldVec[4] = {
1371 VectorEntry.getOperand(0),
1372 VectorEntry.getOperand(1),
1373 VectorEntry.getOperand(2),
1374 VectorEntry.getOperand(3)
1375 };
1376
1377 for (unsigned i = 0; i < 4; i++) {
1378 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1379 if (C->isZero()) {
1380 RemapSwizzle[i] = 4; // SEL_0
1381 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1382 } else if (C->isExactlyValue(1.0)) {
1383 RemapSwizzle[i] = 5; // SEL_1
1384 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1385 }
1386 }
1387
1388 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1389 continue;
1390 for (unsigned j = 0; j < i; j++) {
1391 if (NewBldVec[i] == NewBldVec[j]) {
1392 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1393 RemapSwizzle[i] = j;
1394 break;
1395 }
1396 }
1397 }
1398
1399 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1400 VectorEntry.getValueType(), NewBldVec, 4);
1401}
1402
Benjamin Kramer193960c2013-06-11 13:32:25 +00001403static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1404 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001405 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1406 assert(RemapSwizzle.empty());
1407 SDValue NewBldVec[4] = {
1408 VectorEntry.getOperand(0),
1409 VectorEntry.getOperand(1),
1410 VectorEntry.getOperand(2),
1411 VectorEntry.getOperand(3)
1412 };
1413 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001414 for (unsigned i = 0; i < 4; i++)
1415 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001416
1417 for (unsigned i = 0; i < 4; i++) {
1418 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1419 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1420 ->getZExtValue();
1421 if (!isUnmovable[Idx]) {
1422 // Swap i and Idx
1423 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001424 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001425 }
1426 isUnmovable[Idx] = true;
1427 }
1428 }
1429
1430 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1431 VectorEntry.getValueType(), NewBldVec, 4);
1432}
1433
1434
1435SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1436SDValue Swz[4], SelectionDAG &DAG) const {
1437 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1438 // Old -> New swizzle values
1439 DenseMap<unsigned, unsigned> SwizzleRemap;
1440
1441 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1442 for (unsigned i = 0; i < 4; i++) {
1443 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1444 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1445 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1446 }
1447
1448 SwizzleRemap.clear();
1449 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1450 for (unsigned i = 0; i < 4; i++) {
1451 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1452 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1453 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1454 }
1455
1456 return BuildVector;
1457}
1458
1459
Tom Stellard75aadc22012-12-11 21:25:42 +00001460//===----------------------------------------------------------------------===//
1461// Custom DAG Optimizations
1462//===----------------------------------------------------------------------===//
1463
1464SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1465 DAGCombinerInfo &DCI) const {
1466 SelectionDAG &DAG = DCI.DAG;
1467
1468 switch (N->getOpcode()) {
1469 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1470 case ISD::FP_ROUND: {
1471 SDValue Arg = N->getOperand(0);
1472 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001473 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001474 Arg.getOperand(0));
1475 }
1476 break;
1477 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001478
1479 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1480 // (i32 select_cc f32, f32, -1, 0 cc)
1481 //
1482 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1483 // this to one of the SET*_DX10 instructions.
1484 case ISD::FP_TO_SINT: {
1485 SDValue FNeg = N->getOperand(0);
1486 if (FNeg.getOpcode() != ISD::FNEG) {
1487 return SDValue();
1488 }
1489 SDValue SelectCC = FNeg.getOperand(0);
1490 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1491 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1492 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1493 !isHWTrueValue(SelectCC.getOperand(2)) ||
1494 !isHWFalseValue(SelectCC.getOperand(3))) {
1495 return SDValue();
1496 }
1497
Andrew Trickef9de2a2013-05-25 02:42:55 +00001498 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001499 SelectCC.getOperand(0), // LHS
1500 SelectCC.getOperand(1), // RHS
1501 DAG.getConstant(-1, MVT::i32), // True
1502 DAG.getConstant(0, MVT::i32), // Flase
1503 SelectCC.getOperand(4)); // CC
1504
1505 break;
1506 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001507
1508 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1509 // => build_vector elt0, …, NewEltIdx, …, eltN
1510 case ISD::INSERT_VECTOR_ELT: {
1511 SDValue InVec = N->getOperand(0);
1512 SDValue InVal = N->getOperand(1);
1513 SDValue EltNo = N->getOperand(2);
1514 SDLoc dl(N);
1515
1516 // If the inserted element is an UNDEF, just use the input vector.
1517 if (InVal.getOpcode() == ISD::UNDEF)
1518 return InVec;
1519
1520 EVT VT = InVec.getValueType();
1521
1522 // If we can't generate a legal BUILD_VECTOR, exit
1523 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1524 return SDValue();
1525
1526 // Check that we know which element is being inserted
1527 if (!isa<ConstantSDNode>(EltNo))
1528 return SDValue();
1529 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1530
1531 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1532 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1533 // vector elements.
1534 SmallVector<SDValue, 8> Ops;
1535 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1536 Ops.append(InVec.getNode()->op_begin(),
1537 InVec.getNode()->op_end());
1538 } else if (InVec.getOpcode() == ISD::UNDEF) {
1539 unsigned NElts = VT.getVectorNumElements();
1540 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1541 } else {
1542 return SDValue();
1543 }
1544
1545 // Insert the element
1546 if (Elt < Ops.size()) {
1547 // All the operands of BUILD_VECTOR must have the same type;
1548 // we enforce that here.
1549 EVT OpVT = Ops[0].getValueType();
1550 if (InVal.getValueType() != OpVT)
1551 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1552 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1553 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1554 Ops[Elt] = InVal;
1555 }
1556
1557 // Return the new vector
1558 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1559 VT, &Ops[0], Ops.size());
1560 }
1561
Tom Stellard365366f2013-01-23 02:09:06 +00001562 // Extract_vec (Build_vector) generated by custom lowering
1563 // also needs to be customly combined
1564 case ISD::EXTRACT_VECTOR_ELT: {
1565 SDValue Arg = N->getOperand(0);
1566 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1567 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1568 unsigned Element = Const->getZExtValue();
1569 return Arg->getOperand(Element);
1570 }
1571 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001572 if (Arg.getOpcode() == ISD::BITCAST &&
1573 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1574 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1575 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001576 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001577 Arg->getOperand(0).getOperand(Element));
1578 }
1579 }
Tom Stellard365366f2013-01-23 02:09:06 +00001580 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001581
1582 case ISD::SELECT_CC: {
1583 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1584 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001585 //
1586 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1587 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001588 SDValue LHS = N->getOperand(0);
1589 if (LHS.getOpcode() != ISD::SELECT_CC) {
1590 return SDValue();
1591 }
1592
1593 SDValue RHS = N->getOperand(1);
1594 SDValue True = N->getOperand(2);
1595 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001596 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001597
1598 if (LHS.getOperand(2).getNode() != True.getNode() ||
1599 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001600 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001601 return SDValue();
1602 }
1603
Tom Stellard5e524892013-03-08 15:37:11 +00001604 switch (NCC) {
1605 default: return SDValue();
1606 case ISD::SETNE: return LHS;
1607 case ISD::SETEQ: {
1608 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1609 LHSCC = ISD::getSetCCInverse(LHSCC,
1610 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001611 if (DCI.isBeforeLegalizeOps() ||
1612 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1613 return DAG.getSelectCC(SDLoc(N),
1614 LHS.getOperand(0),
1615 LHS.getOperand(1),
1616 LHS.getOperand(2),
1617 LHS.getOperand(3),
1618 LHSCC);
1619 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001620 }
Tom Stellard5e524892013-03-08 15:37:11 +00001621 }
Tom Stellardcd428182013-09-28 02:50:38 +00001622 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001623 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001624
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001625 case AMDGPUISD::EXPORT: {
1626 SDValue Arg = N->getOperand(1);
1627 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1628 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001629
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001630 SDValue NewArgs[8] = {
1631 N->getOperand(0), // Chain
1632 SDValue(),
1633 N->getOperand(2), // ArrayBase
1634 N->getOperand(3), // Type
1635 N->getOperand(4), // SWZ_X
1636 N->getOperand(5), // SWZ_Y
1637 N->getOperand(6), // SWZ_Z
1638 N->getOperand(7) // SWZ_W
1639 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001640 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001641 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001642 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001643 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001644 case AMDGPUISD::TEXTURE_FETCH: {
1645 SDValue Arg = N->getOperand(1);
1646 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1647 break;
1648
1649 SDValue NewArgs[19] = {
1650 N->getOperand(0),
1651 N->getOperand(1),
1652 N->getOperand(2),
1653 N->getOperand(3),
1654 N->getOperand(4),
1655 N->getOperand(5),
1656 N->getOperand(6),
1657 N->getOperand(7),
1658 N->getOperand(8),
1659 N->getOperand(9),
1660 N->getOperand(10),
1661 N->getOperand(11),
1662 N->getOperand(12),
1663 N->getOperand(13),
1664 N->getOperand(14),
1665 N->getOperand(15),
1666 N->getOperand(16),
1667 N->getOperand(17),
1668 N->getOperand(18),
1669 };
1670 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1671 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1672 NewArgs, 19);
1673 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001674 }
1675 return SDValue();
1676}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001677
1678static bool
1679FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001680 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001681 const R600InstrInfo *TII =
1682 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1683 if (!Src.isMachineOpcode())
1684 return false;
1685 switch (Src.getMachineOpcode()) {
1686 case AMDGPU::FNEG_R600:
1687 if (!Neg.getNode())
1688 return false;
1689 Src = Src.getOperand(0);
1690 Neg = DAG.getTargetConstant(1, MVT::i32);
1691 return true;
1692 case AMDGPU::FABS_R600:
1693 if (!Abs.getNode())
1694 return false;
1695 Src = Src.getOperand(0);
1696 Abs = DAG.getTargetConstant(1, MVT::i32);
1697 return true;
1698 case AMDGPU::CONST_COPY: {
1699 unsigned Opcode = ParentNode->getMachineOpcode();
1700 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1701
1702 if (!Sel.getNode())
1703 return false;
1704
1705 SDValue CstOffset = Src.getOperand(0);
1706 if (ParentNode->getValueType(0).isVector())
1707 return false;
1708
1709 // Gather constants values
1710 int SrcIndices[] = {
1711 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1712 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1713 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1714 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1715 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1716 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1717 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1718 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1719 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1720 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1721 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1722 };
1723 std::vector<unsigned> Consts;
1724 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1725 int OtherSrcIdx = SrcIndices[i];
1726 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1727 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1728 continue;
1729 if (HasDst) {
1730 OtherSrcIdx--;
1731 OtherSelIdx--;
1732 }
1733 if (RegisterSDNode *Reg =
1734 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1735 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1736 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1737 ParentNode->getOperand(OtherSelIdx));
1738 Consts.push_back(Cst->getZExtValue());
1739 }
1740 }
1741 }
1742
1743 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1744 Consts.push_back(Cst->getZExtValue());
1745 if (!TII->fitsConstReadLimitations(Consts)) {
1746 return false;
1747 }
1748
1749 Sel = CstOffset;
1750 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1751 return true;
1752 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001753 case AMDGPU::MOV_IMM_I32:
1754 case AMDGPU::MOV_IMM_F32: {
1755 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1756 uint64_t ImmValue = 0;
1757
1758
1759 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1760 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1761 float FloatValue = FPC->getValueAPF().convertToFloat();
1762 if (FloatValue == 0.0) {
1763 ImmReg = AMDGPU::ZERO;
1764 } else if (FloatValue == 0.5) {
1765 ImmReg = AMDGPU::HALF;
1766 } else if (FloatValue == 1.0) {
1767 ImmReg = AMDGPU::ONE;
1768 } else {
1769 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1770 }
1771 } else {
1772 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1773 uint64_t Value = C->getZExtValue();
1774 if (Value == 0) {
1775 ImmReg = AMDGPU::ZERO;
1776 } else if (Value == 1) {
1777 ImmReg = AMDGPU::ONE_INT;
1778 } else {
1779 ImmValue = Value;
1780 }
1781 }
1782
1783 // Check that we aren't already using an immediate.
1784 // XXX: It's possible for an instruction to have more than one
1785 // immediate operand, but this is not supported yet.
1786 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1787 if (!Imm.getNode())
1788 return false;
1789 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1790 assert(C);
1791 if (C->getZExtValue())
1792 return false;
1793 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1794 }
1795 Src = DAG.getRegister(ImmReg, MVT::i32);
1796 return true;
1797 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001798 default:
1799 return false;
1800 }
1801}
1802
1803
1804/// \brief Fold the instructions after selecting them
1805SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1806 SelectionDAG &DAG) const {
1807 const R600InstrInfo *TII =
1808 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1809 if (!Node->isMachineOpcode())
1810 return Node;
1811 unsigned Opcode = Node->getMachineOpcode();
1812 SDValue FakeOp;
1813
1814 std::vector<SDValue> Ops;
1815 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1816 I != E; ++I)
1817 Ops.push_back(*I);
1818
1819 if (Opcode == AMDGPU::DOT_4) {
1820 int OperandIdx[] = {
1821 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1822 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1823 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1824 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1825 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1826 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1827 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1828 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1829 };
1830 int NegIdx[] = {
1831 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1832 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1833 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1834 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1835 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1836 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1837 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1838 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1839 };
1840 int AbsIdx[] = {
1841 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1842 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1843 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1844 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1845 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1846 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1847 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1848 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1849 };
1850 for (unsigned i = 0; i < 8; i++) {
1851 if (OperandIdx[i] < 0)
1852 return Node;
1853 SDValue &Src = Ops[OperandIdx[i] - 1];
1854 SDValue &Neg = Ops[NegIdx[i] - 1];
1855 SDValue &Abs = Ops[AbsIdx[i] - 1];
1856 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1857 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1858 if (HasDst)
1859 SelIdx--;
1860 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001861 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1862 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1863 }
1864 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1865 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1866 SDValue &Src = Ops[i];
1867 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001868 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1869 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001870 } else if (Opcode == AMDGPU::CLAMP_R600) {
1871 SDValue Src = Node->getOperand(0);
1872 if (!Src.isMachineOpcode() ||
1873 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1874 return Node;
1875 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1876 AMDGPU::OpName::clamp);
1877 if (ClampIdx < 0)
1878 return Node;
1879 std::vector<SDValue> Ops;
1880 unsigned NumOp = Src.getNumOperands();
1881 for(unsigned i = 0; i < NumOp; ++i)
1882 Ops.push_back(Src.getOperand(i));
1883 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1884 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1885 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001886 } else {
1887 if (!TII->hasInstrModifiers(Opcode))
1888 return Node;
1889 int OperandIdx[] = {
1890 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1891 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1893 };
1894 int NegIdx[] = {
1895 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1896 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1897 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1898 };
1899 int AbsIdx[] = {
1900 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1901 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1902 -1
1903 };
1904 for (unsigned i = 0; i < 3; i++) {
1905 if (OperandIdx[i] < 0)
1906 return Node;
1907 SDValue &Src = Ops[OperandIdx[i] - 1];
1908 SDValue &Neg = Ops[NegIdx[i] - 1];
1909 SDValue FakeAbs;
1910 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1911 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1912 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001913 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1914 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001915 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001916 ImmIdx--;
1917 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001918 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001919 SDValue &Imm = Ops[ImmIdx];
1920 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001921 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1922 }
1923 }
1924
1925 return Node;
1926}