blob: 81a28be104a6fdd4bb9efb17789f6ec42cb6c368 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000093 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
95 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000097 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000098 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000099 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000101 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
102 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
Tom Stellard365366f2013-01-23 02:09:06 +0000104 setOperationAction(ISD::LOAD, MVT::i32, Custom);
105 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000106 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
107
Tom Stellard75aadc22012-12-11 21:25:42 +0000108 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000109 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000111 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000112 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000113
Michel Danzer49812b52013-07-10 16:37:07 +0000114 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
115
Tom Stellardb852af52013-03-08 15:37:03 +0000116 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000117 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000118 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119}
120
121MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
122 MachineInstr * MI, MachineBasicBlock * BB) const {
123 MachineFunction * MF = BB->getParent();
124 MachineRegisterInfo &MRI = MF->getRegInfo();
125 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000126 const R600InstrInfo *TII =
127 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
129 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000130 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000131 if (TII->isLDSInstr(MI->getOpcode()) &&
132 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
133 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
134 assert(DstIdx != -1);
135 MachineInstrBuilder NewMI;
136 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
137 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
138 AMDGPU::OQAP);
139 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
140 MI->getOperand(0).getReg(),
141 AMDGPU::OQAP);
142 } else {
143 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
144 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
145 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000146 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
147 NewMI.addOperand(MI->getOperand(i));
148 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000149 } else {
150 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
151 }
152 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 case AMDGPU::CLAMP_R600: {
154 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
155 AMDGPU::MOV,
156 MI->getOperand(0).getReg(),
157 MI->getOperand(1).getReg());
158 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
159 break;
160 }
161
162 case AMDGPU::FABS_R600: {
163 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
164 AMDGPU::MOV,
165 MI->getOperand(0).getReg(),
166 MI->getOperand(1).getReg());
167 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
168 break;
169 }
170
171 case AMDGPU::FNEG_R600: {
172 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
173 AMDGPU::MOV,
174 MI->getOperand(0).getReg(),
175 MI->getOperand(1).getReg());
176 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
177 break;
178 }
179
Tom Stellard75aadc22012-12-11 21:25:42 +0000180 case AMDGPU::MASK_WRITE: {
181 unsigned maskedRegister = MI->getOperand(0).getReg();
182 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
183 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
184 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
185 break;
186 }
187
188 case AMDGPU::MOV_IMM_F32:
189 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
190 MI->getOperand(1).getFPImm()->getValueAPF()
191 .bitcastToAPInt().getZExtValue());
192 break;
193 case AMDGPU::MOV_IMM_I32:
194 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
195 MI->getOperand(1).getImm());
196 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000197 case AMDGPU::CONST_COPY: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
199 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000200 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000201 MI->getOperand(1).getImm());
202 break;
203 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000206 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000207 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
208 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
209
210 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
211 .addOperand(MI->getOperand(0))
212 .addOperand(MI->getOperand(1))
213 .addImm(EOP); // Set End of program bit
214 break;
215 }
216
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::TXD: {
218 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
219 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000220 MachineOperand &RID = MI->getOperand(4);
221 MachineOperand &SID = MI->getOperand(5);
222 unsigned TextureId = MI->getOperand(6).getImm();
223 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
224 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 switch (TextureId) {
227 case 5: // Rect
228 CTX = CTY = 0;
229 break;
230 case 6: // Shadow1D
231 SrcW = SrcZ;
232 break;
233 case 7: // Shadow2D
234 SrcW = SrcZ;
235 break;
236 case 8: // ShadowRect
237 CTX = CTY = 0;
238 SrcW = SrcZ;
239 break;
240 case 9: // 1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 10: // 2DArray
245 CTZ = 0;
246 break;
247 case 11: // Shadow1DArray
248 SrcZ = SrcY;
249 CTZ = 0;
250 break;
251 case 12: // Shadow2DArray
252 CTZ = 0;
253 break;
254 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
256 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 .addImm(SrcX)
258 .addImm(SrcY)
259 .addImm(SrcZ)
260 .addImm(SrcW)
261 .addImm(0)
262 .addImm(0)
263 .addImm(0)
264 .addImm(0)
265 .addImm(1)
266 .addImm(2)
267 .addImm(3)
268 .addOperand(RID)
269 .addOperand(SID)
270 .addImm(CTX)
271 .addImm(CTY)
272 .addImm(CTZ)
273 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
275 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000276 .addImm(SrcX)
277 .addImm(SrcY)
278 .addImm(SrcZ)
279 .addImm(SrcW)
280 .addImm(0)
281 .addImm(0)
282 .addImm(0)
283 .addImm(0)
284 .addImm(1)
285 .addImm(2)
286 .addImm(3)
287 .addOperand(RID)
288 .addOperand(SID)
289 .addImm(CTX)
290 .addImm(CTY)
291 .addImm(CTZ)
292 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 .addImm(SrcX)
297 .addImm(SrcY)
298 .addImm(SrcZ)
299 .addImm(SrcW)
300 .addImm(0)
301 .addImm(0)
302 .addImm(0)
303 .addImm(0)
304 .addImm(1)
305 .addImm(2)
306 .addImm(3)
307 .addOperand(RID)
308 .addOperand(SID)
309 .addImm(CTX)
310 .addImm(CTY)
311 .addImm(CTZ)
312 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000313 .addReg(T0, RegState::Implicit)
314 .addReg(T1, RegState::Implicit);
315 break;
316 }
317
318 case AMDGPU::TXD_SHADOW: {
319 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
320 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 MachineOperand &RID = MI->getOperand(4);
322 MachineOperand &SID = MI->getOperand(5);
323 unsigned TextureId = MI->getOperand(6).getImm();
324 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
325 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
326
327 switch (TextureId) {
328 case 5: // Rect
329 CTX = CTY = 0;
330 break;
331 case 6: // Shadow1D
332 SrcW = SrcZ;
333 break;
334 case 7: // Shadow2D
335 SrcW = SrcZ;
336 break;
337 case 8: // ShadowRect
338 CTX = CTY = 0;
339 SrcW = SrcZ;
340 break;
341 case 9: // 1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 10: // 2DArray
346 CTZ = 0;
347 break;
348 case 11: // Shadow1DArray
349 SrcZ = SrcY;
350 CTZ = 0;
351 break;
352 case 12: // Shadow2DArray
353 CTZ = 0;
354 break;
355 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000356
357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
358 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
377 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000378 .addImm(SrcX)
379 .addImm(SrcY)
380 .addImm(SrcZ)
381 .addImm(SrcW)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(0)
386 .addImm(1)
387 .addImm(2)
388 .addImm(3)
389 .addOperand(RID)
390 .addOperand(SID)
391 .addImm(CTX)
392 .addImm(CTY)
393 .addImm(CTZ)
394 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
396 .addOperand(MI->getOperand(0))
397 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000398 .addImm(SrcX)
399 .addImm(SrcY)
400 .addImm(SrcZ)
401 .addImm(SrcW)
402 .addImm(0)
403 .addImm(0)
404 .addImm(0)
405 .addImm(0)
406 .addImm(1)
407 .addImm(2)
408 .addImm(3)
409 .addOperand(RID)
410 .addOperand(SID)
411 .addImm(CTX)
412 .addImm(CTY)
413 .addImm(CTZ)
414 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000415 .addReg(T0, RegState::Implicit)
416 .addReg(T1, RegState::Implicit);
417 break;
418 }
419
420 case AMDGPU::BRANCH:
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000422 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 break;
424
425 case AMDGPU::BRANCH_COND_f32: {
426 MachineInstr *NewMI =
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
428 AMDGPU::PREDICATE_BIT)
429 .addOperand(MI->getOperand(1))
430 .addImm(OPCODE_IS_NOT_ZERO)
431 .addImm(0); // Flags
432 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000433 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000434 .addOperand(MI->getOperand(0))
435 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
436 break;
437 }
438
439 case AMDGPU::BRANCH_COND_i32: {
440 MachineInstr *NewMI =
441 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
442 AMDGPU::PREDICATE_BIT)
443 .addOperand(MI->getOperand(1))
444 .addImm(OPCODE_IS_NOT_ZERO_INT)
445 .addImm(0); // Flags
446 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 .addOperand(MI->getOperand(0))
449 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
450 break;
451 }
452
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 case AMDGPU::EG_ExportSwz:
454 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 // Instruction is left unmodified if its not the last one of its type
456 bool isLastInstructionOfItsType = true;
457 unsigned InstExportType = MI->getOperand(1).getImm();
458 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
459 EndBlock = BB->end(); NextExportInst != EndBlock;
460 NextExportInst = llvm::next(NextExportInst)) {
461 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
462 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
463 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
464 .getImm();
465 if (CurrentInstExportType == InstExportType) {
466 isLastInstructionOfItsType = false;
467 break;
468 }
469 }
470 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000472 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 return BB;
474 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
476 .addOperand(MI->getOperand(0))
477 .addOperand(MI->getOperand(1))
478 .addOperand(MI->getOperand(2))
479 .addOperand(MI->getOperand(3))
480 .addOperand(MI->getOperand(4))
481 .addOperand(MI->getOperand(5))
482 .addOperand(MI->getOperand(6))
483 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 break;
486 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000487 case AMDGPU::RETURN: {
488 // RETURN instructions must have the live-out registers as implicit uses,
489 // otherwise they appear dead.
490 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
491 MachineInstrBuilder MIB(*MF, MI);
492 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
493 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
494 return BB;
495 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 }
497
498 MI->eraseFromParent();
499 return BB;
500}
501
502//===----------------------------------------------------------------------===//
503// Custom DAG Lowering Operations
504//===----------------------------------------------------------------------===//
505
Tom Stellard75aadc22012-12-11 21:25:42 +0000506SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000507 MachineFunction &MF = DAG.getMachineFunction();
508 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 switch (Op.getOpcode()) {
510 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000511 case ISD::FCOS:
512 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000515 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000516 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000517 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 case ISD::INTRINSIC_VOID: {
519 SDValue Chain = Op.getOperand(0);
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
522 switch (IntrinsicID) {
523 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
525 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000526 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000527 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000529 case AMDGPUIntrinsic::R600_store_swizzle: {
530 const SDValue Args[8] = {
531 Chain,
532 Op.getOperand(2), // Export Value
533 Op.getOperand(3), // ArrayBase
534 Op.getOperand(4), // Type
535 DAG.getConstant(0, MVT::i32), // SWZ_X
536 DAG.getConstant(1, MVT::i32), // SWZ_Y
537 DAG.getConstant(2, MVT::i32), // SWZ_Z
538 DAG.getConstant(3, MVT::i32) // SWZ_W
539 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000540 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000541 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 // default for switch(IntrinsicID)
545 default: break;
546 }
547 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
548 break;
549 }
550 case ISD::INTRINSIC_WO_CHAIN: {
551 unsigned IntrinsicID =
552 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
553 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000554 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 switch(IntrinsicID) {
556 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
557 case AMDGPUIntrinsic::R600_load_input: {
558 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
559 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000560 MachineFunction &MF = DAG.getMachineFunction();
561 MachineRegisterInfo &MRI = MF.getRegInfo();
562 MRI.addLiveIn(Reg);
563 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000564 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000565 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000566
567 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000569 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
570 MachineSDNode *interp;
571 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000572 const MachineFunction &MF = DAG.getMachineFunction();
573 const R600InstrInfo *TII =
574 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000575 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
576 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
577 return DAG.getTargetExtractSubreg(
578 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
579 DL, MVT::f32, SDValue(interp, 0));
580 }
581
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
Tom Stellard41afe6a2013-02-05 17:09:14 +0000593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000596 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000600 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000601 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000603 case AMDGPUIntrinsic::R600_tex:
604 case AMDGPUIntrinsic::R600_texc:
605 case AMDGPUIntrinsic::R600_txl:
606 case AMDGPUIntrinsic::R600_txlc:
607 case AMDGPUIntrinsic::R600_txb:
608 case AMDGPUIntrinsic::R600_txbc:
609 case AMDGPUIntrinsic::R600_txf:
610 case AMDGPUIntrinsic::R600_txq:
611 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000612 case AMDGPUIntrinsic::R600_ddy:
613 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000614 unsigned TextureOp;
615 switch (IntrinsicID) {
616 case AMDGPUIntrinsic::R600_tex:
617 TextureOp = 0;
618 break;
619 case AMDGPUIntrinsic::R600_texc:
620 TextureOp = 1;
621 break;
622 case AMDGPUIntrinsic::R600_txl:
623 TextureOp = 2;
624 break;
625 case AMDGPUIntrinsic::R600_txlc:
626 TextureOp = 3;
627 break;
628 case AMDGPUIntrinsic::R600_txb:
629 TextureOp = 4;
630 break;
631 case AMDGPUIntrinsic::R600_txbc:
632 TextureOp = 5;
633 break;
634 case AMDGPUIntrinsic::R600_txf:
635 TextureOp = 6;
636 break;
637 case AMDGPUIntrinsic::R600_txq:
638 TextureOp = 7;
639 break;
640 case AMDGPUIntrinsic::R600_ddx:
641 TextureOp = 8;
642 break;
643 case AMDGPUIntrinsic::R600_ddy:
644 TextureOp = 9;
645 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000646 case AMDGPUIntrinsic::R600_ldptr:
647 TextureOp = 10;
648 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000649 default:
650 llvm_unreachable("Unknow Texture Operation");
651 }
652
653 SDValue TexArgs[19] = {
654 DAG.getConstant(TextureOp, MVT::i32),
655 Op.getOperand(1),
656 DAG.getConstant(0, MVT::i32),
657 DAG.getConstant(1, MVT::i32),
658 DAG.getConstant(2, MVT::i32),
659 DAG.getConstant(3, MVT::i32),
660 Op.getOperand(2),
661 Op.getOperand(3),
662 Op.getOperand(4),
663 DAG.getConstant(0, MVT::i32),
664 DAG.getConstant(1, MVT::i32),
665 DAG.getConstant(2, MVT::i32),
666 DAG.getConstant(3, MVT::i32),
667 Op.getOperand(5),
668 Op.getOperand(6),
669 Op.getOperand(7),
670 Op.getOperand(8),
671 Op.getOperand(9),
672 Op.getOperand(10)
673 };
674 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
675 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000676 case AMDGPUIntrinsic::AMDGPU_dp4: {
677 SDValue Args[8] = {
678 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
679 DAG.getConstant(0, MVT::i32)),
680 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
681 DAG.getConstant(0, MVT::i32)),
682 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
683 DAG.getConstant(1, MVT::i32)),
684 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
685 DAG.getConstant(1, MVT::i32)),
686 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
687 DAG.getConstant(2, MVT::i32)),
688 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
689 DAG.getConstant(2, MVT::i32)),
690 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
691 DAG.getConstant(3, MVT::i32)),
692 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
693 DAG.getConstant(3, MVT::i32))
694 };
695 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
696 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000697
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000698 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000699 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000700 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000702 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000703 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000704 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000705 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000708 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000709 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000710 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000711 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000712 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000713 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000714 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000715 return LowerImplicitParameter(DAG, VT, DL, 8);
716
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000717 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000718 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
719 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
722 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000723 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000724 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
725 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
728 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000729 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000730 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
731 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
734 AMDGPU::T0_Z, VT);
735 }
736 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
737 break;
738 }
739 } // end switch(Op.getOpcode())
740 return SDValue();
741}
742
743void R600TargetLowering::ReplaceNodeResults(SDNode *N,
744 SmallVectorImpl<SDValue> &Results,
745 SelectionDAG &DAG) const {
746 switch (N->getOpcode()) {
747 default: return;
748 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000749 return;
750 case ISD::LOAD: {
751 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
752 Results.push_back(SDValue(Node, 0));
753 Results.push_back(SDValue(Node, 1));
754 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
755 // function
756 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
757 return;
758 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000759 case ISD::STORE:
760 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
761 Results.push_back(SDValue(Node, 0));
762 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000763 }
764}
765
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000766SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
767 // On hw >= R700, COS/SIN input must be between -1. and 1.
768 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
769 EVT VT = Op.getValueType();
770 SDValue Arg = Op.getOperand(0);
771 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
772 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
773 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
774 DAG.getConstantFP(0.15915494309, MVT::f32)),
775 DAG.getConstantFP(0.5, MVT::f32)));
776 unsigned TrigNode;
777 switch (Op.getOpcode()) {
778 case ISD::FCOS:
779 TrigNode = AMDGPUISD::COS_HW;
780 break;
781 case ISD::FSIN:
782 TrigNode = AMDGPUISD::SIN_HW;
783 break;
784 default:
785 llvm_unreachable("Wrong trig opcode");
786 }
787 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
788 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
789 DAG.getConstantFP(-0.5, MVT::f32)));
790 if (Gen >= AMDGPUSubtarget::R700)
791 return TrigVal;
792 // On R600 hw, COS/SIN input must be between -Pi and Pi.
793 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
794 DAG.getConstantFP(3.14159265359, MVT::f32));
795}
796
Tom Stellard75aadc22012-12-11 21:25:42 +0000797SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
798 return DAG.getNode(
799 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000800 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 MVT::i1,
802 Op, DAG.getConstantFP(0.0f, MVT::f32),
803 DAG.getCondCode(ISD::SETNE)
804 );
805}
806
Tom Stellard75aadc22012-12-11 21:25:42 +0000807SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000808 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 unsigned DwordOffset) const {
810 unsigned ByteOffset = DwordOffset * 4;
811 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000812 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000813
814 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
815 assert(isInt<16>(ByteOffset));
816
817 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
818 DAG.getConstant(ByteOffset, MVT::i32), // PTR
819 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
820 false, false, false, 0);
821}
822
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000823SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
824
825 MachineFunction &MF = DAG.getMachineFunction();
826 const AMDGPUFrameLowering *TFL =
827 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
828
829 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
830 assert(FIN);
831
832 unsigned FrameIndex = FIN->getIndex();
833 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
834 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
835}
836
Tom Stellard75aadc22012-12-11 21:25:42 +0000837bool R600TargetLowering::isZero(SDValue Op) const {
838 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
839 return Cst->isNullValue();
840 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
841 return CstFP->isZero();
842 } else {
843 return false;
844 }
845}
846
847SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000848 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000849 EVT VT = Op.getValueType();
850
851 SDValue LHS = Op.getOperand(0);
852 SDValue RHS = Op.getOperand(1);
853 SDValue True = Op.getOperand(2);
854 SDValue False = Op.getOperand(3);
855 SDValue CC = Op.getOperand(4);
856 SDValue Temp;
857
858 // LHS and RHS are guaranteed to be the same value type
859 EVT CompareVT = LHS.getValueType();
860
861 // Check if we can lower this to a native operation.
862
Tom Stellard2add82d2013-03-08 15:37:09 +0000863 // Try to lower to a SET* instruction:
864 //
865 // SET* can match the following patterns:
866 //
Tom Stellardcd428182013-09-28 02:50:38 +0000867 // select_cc f32, f32, -1, 0, cc_supported
868 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
869 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000870 //
871
872 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000873 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
874 ISD::CondCode InverseCC =
875 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000876 if (isHWTrueValue(False) && isHWFalseValue(True)) {
877 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
878 std::swap(False, True);
879 CC = DAG.getCondCode(InverseCC);
880 } else {
881 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
882 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
883 std::swap(False, True);
884 std::swap(LHS, RHS);
885 CC = DAG.getCondCode(SwapInvCC);
886 }
887 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000888 }
889
890 if (isHWTrueValue(True) && isHWFalseValue(False) &&
891 (CompareVT == VT || VT == MVT::i32)) {
892 // This can be matched by a SET* instruction.
893 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
894 }
895
Tom Stellard75aadc22012-12-11 21:25:42 +0000896 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000897 //
898 // CND* can match the following patterns:
899 //
Tom Stellardcd428182013-09-28 02:50:38 +0000900 // select_cc f32, 0.0, f32, f32, cc_supported
901 // select_cc f32, 0.0, i32, i32, cc_supported
902 // select_cc i32, 0, f32, f32, cc_supported
903 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000904 //
Tom Stellardcd428182013-09-28 02:50:38 +0000905
906 // Try to move the zero value to the RHS
907 if (isZero(LHS)) {
908 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
909 // Try swapping the operands
910 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
911 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
912 std::swap(LHS, RHS);
913 CC = DAG.getCondCode(CCSwapped);
914 } else {
915 // Try inverting the conditon and then swapping the operands
916 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
917 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
918 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
919 std::swap(True, False);
920 std::swap(LHS, RHS);
921 CC = DAG.getCondCode(CCSwapped);
922 }
923 }
924 }
925 if (isZero(RHS)) {
926 SDValue Cond = LHS;
927 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000928 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
929 if (CompareVT != VT) {
930 // Bitcast True / False to the correct types. This will end up being
931 // a nop, but it allows us to define only a single pattern in the
932 // .TD files for each CND* instruction rather than having to have
933 // one pattern for integer True/False and one for fp True/False
934 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
935 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
936 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000937
938 switch (CCOpcode) {
939 case ISD::SETONE:
940 case ISD::SETUNE:
941 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000942 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
943 Temp = True;
944 True = False;
945 False = Temp;
946 break;
947 default:
948 break;
949 }
950 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
951 Cond, Zero,
952 True, False,
953 DAG.getCondCode(CCOpcode));
954 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
955 }
956
Tom Stellard75aadc22012-12-11 21:25:42 +0000957
958 // Possible Min/Max pattern
959 SDValue MinMax = LowerMinMax(Op, DAG);
960 if (MinMax.getNode()) {
961 return MinMax;
962 }
963
964 // If we make it this for it means we have no native instructions to handle
965 // this SELECT_CC, so we must lower it.
966 SDValue HWTrue, HWFalse;
967
968 if (CompareVT == MVT::f32) {
969 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
970 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
971 } else if (CompareVT == MVT::i32) {
972 HWTrue = DAG.getConstant(-1, CompareVT);
973 HWFalse = DAG.getConstant(0, CompareVT);
974 }
975 else {
976 assert(!"Unhandled value type in LowerSELECT_CC");
977 }
978
979 // Lower this unsupported SELECT_CC into a combination of two supported
980 // SELECT_CC operations.
981 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
982
983 return DAG.getNode(ISD::SELECT_CC, DL, VT,
984 Cond, HWFalse,
985 True, False,
986 DAG.getCondCode(ISD::SETNE));
987}
988
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000989/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
990/// convert these pointers to a register index. Each register holds
991/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
992/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
993/// for indirect addressing.
994SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
995 unsigned StackWidth,
996 SelectionDAG &DAG) const {
997 unsigned SRLPad;
998 switch(StackWidth) {
999 case 1:
1000 SRLPad = 2;
1001 break;
1002 case 2:
1003 SRLPad = 3;
1004 break;
1005 case 4:
1006 SRLPad = 4;
1007 break;
1008 default: llvm_unreachable("Invalid stack width");
1009 }
1010
Andrew Trickef9de2a2013-05-25 02:42:55 +00001011 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001012 DAG.getConstant(SRLPad, MVT::i32));
1013}
1014
1015void R600TargetLowering::getStackAddress(unsigned StackWidth,
1016 unsigned ElemIdx,
1017 unsigned &Channel,
1018 unsigned &PtrIncr) const {
1019 switch (StackWidth) {
1020 default:
1021 case 1:
1022 Channel = 0;
1023 if (ElemIdx > 0) {
1024 PtrIncr = 1;
1025 } else {
1026 PtrIncr = 0;
1027 }
1028 break;
1029 case 2:
1030 Channel = ElemIdx % 2;
1031 if (ElemIdx == 2) {
1032 PtrIncr = 1;
1033 } else {
1034 PtrIncr = 0;
1035 }
1036 break;
1037 case 4:
1038 Channel = ElemIdx;
1039 PtrIncr = 0;
1040 break;
1041 }
1042}
1043
Tom Stellard75aadc22012-12-11 21:25:42 +00001044SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001045 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001046 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1047 SDValue Chain = Op.getOperand(0);
1048 SDValue Value = Op.getOperand(1);
1049 SDValue Ptr = Op.getOperand(2);
1050
Tom Stellard2ffc3302013-08-26 15:05:44 +00001051 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001052 if (Result.getNode()) {
1053 return Result;
1054 }
1055
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001056 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1057 if (StoreNode->isTruncatingStore()) {
1058 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001059 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 EVT MemVT = StoreNode->getMemoryVT();
1061 SDValue MaskConstant;
1062 if (MemVT == MVT::i8) {
1063 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1064 } else {
1065 assert(MemVT == MVT::i16);
1066 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1067 }
1068 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1069 DAG.getConstant(2, MVT::i32));
1070 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1071 DAG.getConstant(0x00000003, VT));
1072 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1073 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1074 DAG.getConstant(3, VT));
1075 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1076 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1077 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1078 // vector instead.
1079 SDValue Src[4] = {
1080 ShiftedValue,
1081 DAG.getConstant(0, MVT::i32),
1082 DAG.getConstant(0, MVT::i32),
1083 Mask
1084 };
1085 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1086 SDValue Args[3] = { Chain, Input, DWordAddr };
1087 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1088 Op->getVTList(), Args, 3, MemVT,
1089 StoreNode->getMemOperand());
1090 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1091 Value.getValueType().bitsGE(MVT::i32)) {
1092 // Convert pointer from byte address to dword address.
1093 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1094 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1095 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001096
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001097 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1098 assert(!"Truncated and indexed stores not supported yet");
1099 } else {
1100 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1101 }
1102 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001103 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001104 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001105
1106 EVT ValueVT = Value.getValueType();
1107
1108 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1109 return SDValue();
1110 }
1111
1112 // Lowering for indirect addressing
1113
1114 const MachineFunction &MF = DAG.getMachineFunction();
1115 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1116 getTargetMachine().getFrameLowering());
1117 unsigned StackWidth = TFL->getStackWidth(MF);
1118
1119 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1120
1121 if (ValueVT.isVector()) {
1122 unsigned NumElemVT = ValueVT.getVectorNumElements();
1123 EVT ElemVT = ValueVT.getVectorElementType();
1124 SDValue Stores[4];
1125
1126 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1127 "vector width in load");
1128
1129 for (unsigned i = 0; i < NumElemVT; ++i) {
1130 unsigned Channel, PtrIncr;
1131 getStackAddress(StackWidth, i, Channel, PtrIncr);
1132 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1133 DAG.getConstant(PtrIncr, MVT::i32));
1134 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1135 Value, DAG.getConstant(i, MVT::i32));
1136
1137 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1138 Chain, Elem, Ptr,
1139 DAG.getTargetConstant(Channel, MVT::i32));
1140 }
1141 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1142 } else {
1143 if (ValueVT == MVT::i8) {
1144 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1145 }
1146 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001147 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001148 }
1149
1150 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001151}
1152
Tom Stellard365366f2013-01-23 02:09:06 +00001153// return (512 + (kc_bank << 12)
1154static int
1155ConstantAddressBlock(unsigned AddressSpace) {
1156 switch (AddressSpace) {
1157 case AMDGPUAS::CONSTANT_BUFFER_0:
1158 return 512;
1159 case AMDGPUAS::CONSTANT_BUFFER_1:
1160 return 512 + 4096;
1161 case AMDGPUAS::CONSTANT_BUFFER_2:
1162 return 512 + 4096 * 2;
1163 case AMDGPUAS::CONSTANT_BUFFER_3:
1164 return 512 + 4096 * 3;
1165 case AMDGPUAS::CONSTANT_BUFFER_4:
1166 return 512 + 4096 * 4;
1167 case AMDGPUAS::CONSTANT_BUFFER_5:
1168 return 512 + 4096 * 5;
1169 case AMDGPUAS::CONSTANT_BUFFER_6:
1170 return 512 + 4096 * 6;
1171 case AMDGPUAS::CONSTANT_BUFFER_7:
1172 return 512 + 4096 * 7;
1173 case AMDGPUAS::CONSTANT_BUFFER_8:
1174 return 512 + 4096 * 8;
1175 case AMDGPUAS::CONSTANT_BUFFER_9:
1176 return 512 + 4096 * 9;
1177 case AMDGPUAS::CONSTANT_BUFFER_10:
1178 return 512 + 4096 * 10;
1179 case AMDGPUAS::CONSTANT_BUFFER_11:
1180 return 512 + 4096 * 11;
1181 case AMDGPUAS::CONSTANT_BUFFER_12:
1182 return 512 + 4096 * 12;
1183 case AMDGPUAS::CONSTANT_BUFFER_13:
1184 return 512 + 4096 * 13;
1185 case AMDGPUAS::CONSTANT_BUFFER_14:
1186 return 512 + 4096 * 14;
1187 case AMDGPUAS::CONSTANT_BUFFER_15:
1188 return 512 + 4096 * 15;
1189 default:
1190 return -1;
1191 }
1192}
1193
1194SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1195{
1196 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001197 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001198 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1199 SDValue Chain = Op.getOperand(0);
1200 SDValue Ptr = Op.getOperand(1);
1201 SDValue LoweredLoad;
1202
Tom Stellard35bb18c2013-08-26 15:06:04 +00001203 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1204 SDValue MergedValues[2] = {
1205 SplitVectorLoad(Op, DAG),
1206 Chain
1207 };
1208 return DAG.getMergeValues(MergedValues, 2, DL);
1209 }
1210
Tom Stellard365366f2013-01-23 02:09:06 +00001211 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1212 if (ConstantBlock > -1) {
1213 SDValue Result;
1214 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001215 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1216 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001217 SDValue Slots[4];
1218 for (unsigned i = 0; i < 4; i++) {
1219 // We want Const position encoded with the following formula :
1220 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1221 // const_index is Ptr computed by llvm using an alignment of 16.
1222 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1223 // then div by 4 at the ISel step
1224 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1225 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1226 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1227 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001228 EVT NewVT = MVT::v4i32;
1229 unsigned NumElements = 4;
1230 if (VT.isVector()) {
1231 NewVT = VT;
1232 NumElements = VT.getVectorNumElements();
1233 }
1234 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001235 } else {
1236 // non constant ptr cant be folded, keeps it as a v4f32 load
1237 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001238 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001239 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001240 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001241 );
1242 }
1243
1244 if (!VT.isVector()) {
1245 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1246 DAG.getConstant(0, MVT::i32));
1247 }
1248
1249 SDValue MergedValues[2] = {
1250 Result,
1251 Chain
1252 };
1253 return DAG.getMergeValues(MergedValues, 2, DL);
1254 }
1255
Tom Stellard84021442013-07-23 01:48:24 +00001256 // For most operations returning SDValue() will result int he node being
1257 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1258 // we need to manually expand loads that may be legal in some address spaces
1259 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1260 // for compute shaders, since the data is sign extended when it is uploaded
1261 // to the buffer. Howerver SEXT loads from other addresspaces are not
1262 // supported, so we need to expand them here.
1263 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1264 EVT MemVT = LoadNode->getMemoryVT();
1265 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1266 SDValue ShiftAmount =
1267 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1268 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1269 LoadNode->getPointerInfo(), MemVT,
1270 LoadNode->isVolatile(),
1271 LoadNode->isNonTemporal(),
1272 LoadNode->getAlignment());
1273 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1274 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1275
1276 SDValue MergedValues[2] = { Sra, Chain };
1277 return DAG.getMergeValues(MergedValues, 2, DL);
1278 }
1279
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001280 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1281 return SDValue();
1282 }
1283
1284 // Lowering for indirect addressing
1285 const MachineFunction &MF = DAG.getMachineFunction();
1286 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1287 getTargetMachine().getFrameLowering());
1288 unsigned StackWidth = TFL->getStackWidth(MF);
1289
1290 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1291
1292 if (VT.isVector()) {
1293 unsigned NumElemVT = VT.getVectorNumElements();
1294 EVT ElemVT = VT.getVectorElementType();
1295 SDValue Loads[4];
1296
1297 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1298 "vector width in load");
1299
1300 for (unsigned i = 0; i < NumElemVT; ++i) {
1301 unsigned Channel, PtrIncr;
1302 getStackAddress(StackWidth, i, Channel, PtrIncr);
1303 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1304 DAG.getConstant(PtrIncr, MVT::i32));
1305 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1306 Chain, Ptr,
1307 DAG.getTargetConstant(Channel, MVT::i32),
1308 Op.getOperand(2));
1309 }
1310 for (unsigned i = NumElemVT; i < 4; ++i) {
1311 Loads[i] = DAG.getUNDEF(ElemVT);
1312 }
1313 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1314 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1315 } else {
1316 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1317 Chain, Ptr,
1318 DAG.getTargetConstant(0, MVT::i32), // Channel
1319 Op.getOperand(2));
1320 }
1321
1322 SDValue Ops[2];
1323 Ops[0] = LoweredLoad;
1324 Ops[1] = Chain;
1325
1326 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001327}
Tom Stellard75aadc22012-12-11 21:25:42 +00001328
Tom Stellard75aadc22012-12-11 21:25:42 +00001329/// XXX Only kernel functions are supported, so we can assume for now that
1330/// every function is a kernel function, but in the future we should use
1331/// separate calling conventions for kernel and non-kernel functions.
1332SDValue R600TargetLowering::LowerFormalArguments(
1333 SDValue Chain,
1334 CallingConv::ID CallConv,
1335 bool isVarArg,
1336 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001337 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001338 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001339 SmallVector<CCValAssign, 16> ArgLocs;
1340 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1341 getTargetMachine(), ArgLocs, *DAG.getContext());
1342
1343 AnalyzeFormalArguments(CCInfo, Ins);
1344
Tom Stellard1e803092013-07-23 01:48:18 +00001345 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001346 CCValAssign &VA = ArgLocs[i];
1347 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001348
Tom Stellard75aadc22012-12-11 21:25:42 +00001349 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001350 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001351
1352 // The first 36 bytes of the input buffer contains information about
1353 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001354 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001355 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001356 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1357 false, false, 4); // 4 is the prefered alignment for
1358 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001359 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001360 }
1361 return Chain;
1362}
1363
Matt Arsenault758659232013-05-18 00:21:46 +00001364EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001365 if (!VT.isVector()) return MVT::i32;
1366 return VT.changeVectorElementTypeToInteger();
1367}
1368
Benjamin Kramer193960c2013-06-11 13:32:25 +00001369static SDValue
1370CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1371 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001372 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1373 assert(RemapSwizzle.empty());
1374 SDValue NewBldVec[4] = {
1375 VectorEntry.getOperand(0),
1376 VectorEntry.getOperand(1),
1377 VectorEntry.getOperand(2),
1378 VectorEntry.getOperand(3)
1379 };
1380
1381 for (unsigned i = 0; i < 4; i++) {
1382 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1383 if (C->isZero()) {
1384 RemapSwizzle[i] = 4; // SEL_0
1385 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1386 } else if (C->isExactlyValue(1.0)) {
1387 RemapSwizzle[i] = 5; // SEL_1
1388 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1389 }
1390 }
1391
1392 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1393 continue;
1394 for (unsigned j = 0; j < i; j++) {
1395 if (NewBldVec[i] == NewBldVec[j]) {
1396 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1397 RemapSwizzle[i] = j;
1398 break;
1399 }
1400 }
1401 }
1402
1403 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1404 VectorEntry.getValueType(), NewBldVec, 4);
1405}
1406
Benjamin Kramer193960c2013-06-11 13:32:25 +00001407static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1408 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001409 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1410 assert(RemapSwizzle.empty());
1411 SDValue NewBldVec[4] = {
1412 VectorEntry.getOperand(0),
1413 VectorEntry.getOperand(1),
1414 VectorEntry.getOperand(2),
1415 VectorEntry.getOperand(3)
1416 };
1417 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001418 for (unsigned i = 0; i < 4; i++)
1419 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001420
1421 for (unsigned i = 0; i < 4; i++) {
1422 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1423 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1424 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001425 if (i == Idx) {
1426 isUnmovable[Idx] = true;
1427 continue;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001428 }
Vincent Lejeune301beb82013-10-13 17:56:04 +00001429 if (isUnmovable[Idx])
1430 continue;
1431 // Swap i and Idx
1432 std::swap(NewBldVec[Idx], NewBldVec[i]);
1433 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1434 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001435 }
1436 }
1437
1438 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1439 VectorEntry.getValueType(), NewBldVec, 4);
1440}
1441
1442
1443SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1444SDValue Swz[4], SelectionDAG &DAG) const {
1445 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1446 // Old -> New swizzle values
1447 DenseMap<unsigned, unsigned> SwizzleRemap;
1448
1449 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1450 for (unsigned i = 0; i < 4; i++) {
1451 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1452 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1453 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1454 }
1455
1456 SwizzleRemap.clear();
1457 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1458 for (unsigned i = 0; i < 4; i++) {
1459 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1460 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1461 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1462 }
1463
1464 return BuildVector;
1465}
1466
1467
Tom Stellard75aadc22012-12-11 21:25:42 +00001468//===----------------------------------------------------------------------===//
1469// Custom DAG Optimizations
1470//===----------------------------------------------------------------------===//
1471
1472SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1473 DAGCombinerInfo &DCI) const {
1474 SelectionDAG &DAG = DCI.DAG;
1475
1476 switch (N->getOpcode()) {
1477 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1478 case ISD::FP_ROUND: {
1479 SDValue Arg = N->getOperand(0);
1480 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001481 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001482 Arg.getOperand(0));
1483 }
1484 break;
1485 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001486
1487 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1488 // (i32 select_cc f32, f32, -1, 0 cc)
1489 //
1490 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1491 // this to one of the SET*_DX10 instructions.
1492 case ISD::FP_TO_SINT: {
1493 SDValue FNeg = N->getOperand(0);
1494 if (FNeg.getOpcode() != ISD::FNEG) {
1495 return SDValue();
1496 }
1497 SDValue SelectCC = FNeg.getOperand(0);
1498 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1499 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1500 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1501 !isHWTrueValue(SelectCC.getOperand(2)) ||
1502 !isHWFalseValue(SelectCC.getOperand(3))) {
1503 return SDValue();
1504 }
1505
Andrew Trickef9de2a2013-05-25 02:42:55 +00001506 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001507 SelectCC.getOperand(0), // LHS
1508 SelectCC.getOperand(1), // RHS
1509 DAG.getConstant(-1, MVT::i32), // True
1510 DAG.getConstant(0, MVT::i32), // Flase
1511 SelectCC.getOperand(4)); // CC
1512
1513 break;
1514 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001515
1516 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1517 // => build_vector elt0, …, NewEltIdx, …, eltN
1518 case ISD::INSERT_VECTOR_ELT: {
1519 SDValue InVec = N->getOperand(0);
1520 SDValue InVal = N->getOperand(1);
1521 SDValue EltNo = N->getOperand(2);
1522 SDLoc dl(N);
1523
1524 // If the inserted element is an UNDEF, just use the input vector.
1525 if (InVal.getOpcode() == ISD::UNDEF)
1526 return InVec;
1527
1528 EVT VT = InVec.getValueType();
1529
1530 // If we can't generate a legal BUILD_VECTOR, exit
1531 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1532 return SDValue();
1533
1534 // Check that we know which element is being inserted
1535 if (!isa<ConstantSDNode>(EltNo))
1536 return SDValue();
1537 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1538
1539 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1540 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1541 // vector elements.
1542 SmallVector<SDValue, 8> Ops;
1543 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1544 Ops.append(InVec.getNode()->op_begin(),
1545 InVec.getNode()->op_end());
1546 } else if (InVec.getOpcode() == ISD::UNDEF) {
1547 unsigned NElts = VT.getVectorNumElements();
1548 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1549 } else {
1550 return SDValue();
1551 }
1552
1553 // Insert the element
1554 if (Elt < Ops.size()) {
1555 // All the operands of BUILD_VECTOR must have the same type;
1556 // we enforce that here.
1557 EVT OpVT = Ops[0].getValueType();
1558 if (InVal.getValueType() != OpVT)
1559 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1560 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1561 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1562 Ops[Elt] = InVal;
1563 }
1564
1565 // Return the new vector
1566 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1567 VT, &Ops[0], Ops.size());
1568 }
1569
Tom Stellard365366f2013-01-23 02:09:06 +00001570 // Extract_vec (Build_vector) generated by custom lowering
1571 // also needs to be customly combined
1572 case ISD::EXTRACT_VECTOR_ELT: {
1573 SDValue Arg = N->getOperand(0);
1574 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1575 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1576 unsigned Element = Const->getZExtValue();
1577 return Arg->getOperand(Element);
1578 }
1579 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001580 if (Arg.getOpcode() == ISD::BITCAST &&
1581 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1582 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1583 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001584 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001585 Arg->getOperand(0).getOperand(Element));
1586 }
1587 }
Tom Stellard365366f2013-01-23 02:09:06 +00001588 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001589
1590 case ISD::SELECT_CC: {
1591 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1592 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001593 //
1594 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1595 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001596 SDValue LHS = N->getOperand(0);
1597 if (LHS.getOpcode() != ISD::SELECT_CC) {
1598 return SDValue();
1599 }
1600
1601 SDValue RHS = N->getOperand(1);
1602 SDValue True = N->getOperand(2);
1603 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001604 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001605
1606 if (LHS.getOperand(2).getNode() != True.getNode() ||
1607 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001608 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001609 return SDValue();
1610 }
1611
Tom Stellard5e524892013-03-08 15:37:11 +00001612 switch (NCC) {
1613 default: return SDValue();
1614 case ISD::SETNE: return LHS;
1615 case ISD::SETEQ: {
1616 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1617 LHSCC = ISD::getSetCCInverse(LHSCC,
1618 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001619 if (DCI.isBeforeLegalizeOps() ||
1620 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1621 return DAG.getSelectCC(SDLoc(N),
1622 LHS.getOperand(0),
1623 LHS.getOperand(1),
1624 LHS.getOperand(2),
1625 LHS.getOperand(3),
1626 LHSCC);
1627 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001628 }
Tom Stellard5e524892013-03-08 15:37:11 +00001629 }
Tom Stellardcd428182013-09-28 02:50:38 +00001630 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001631 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001632
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001633 case AMDGPUISD::EXPORT: {
1634 SDValue Arg = N->getOperand(1);
1635 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1636 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001637
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001638 SDValue NewArgs[8] = {
1639 N->getOperand(0), // Chain
1640 SDValue(),
1641 N->getOperand(2), // ArrayBase
1642 N->getOperand(3), // Type
1643 N->getOperand(4), // SWZ_X
1644 N->getOperand(5), // SWZ_Y
1645 N->getOperand(6), // SWZ_Z
1646 N->getOperand(7) // SWZ_W
1647 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001648 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001649 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001650 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001651 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001652 case AMDGPUISD::TEXTURE_FETCH: {
1653 SDValue Arg = N->getOperand(1);
1654 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1655 break;
1656
1657 SDValue NewArgs[19] = {
1658 N->getOperand(0),
1659 N->getOperand(1),
1660 N->getOperand(2),
1661 N->getOperand(3),
1662 N->getOperand(4),
1663 N->getOperand(5),
1664 N->getOperand(6),
1665 N->getOperand(7),
1666 N->getOperand(8),
1667 N->getOperand(9),
1668 N->getOperand(10),
1669 N->getOperand(11),
1670 N->getOperand(12),
1671 N->getOperand(13),
1672 N->getOperand(14),
1673 N->getOperand(15),
1674 N->getOperand(16),
1675 N->getOperand(17),
1676 N->getOperand(18),
1677 };
1678 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1679 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1680 NewArgs, 19);
1681 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001682 }
1683 return SDValue();
1684}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001685
1686static bool
1687FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001688 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001689 const R600InstrInfo *TII =
1690 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1691 if (!Src.isMachineOpcode())
1692 return false;
1693 switch (Src.getMachineOpcode()) {
1694 case AMDGPU::FNEG_R600:
1695 if (!Neg.getNode())
1696 return false;
1697 Src = Src.getOperand(0);
1698 Neg = DAG.getTargetConstant(1, MVT::i32);
1699 return true;
1700 case AMDGPU::FABS_R600:
1701 if (!Abs.getNode())
1702 return false;
1703 Src = Src.getOperand(0);
1704 Abs = DAG.getTargetConstant(1, MVT::i32);
1705 return true;
1706 case AMDGPU::CONST_COPY: {
1707 unsigned Opcode = ParentNode->getMachineOpcode();
1708 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1709
1710 if (!Sel.getNode())
1711 return false;
1712
1713 SDValue CstOffset = Src.getOperand(0);
1714 if (ParentNode->getValueType(0).isVector())
1715 return false;
1716
1717 // Gather constants values
1718 int SrcIndices[] = {
1719 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1720 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1721 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1722 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1723 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1724 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1725 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1726 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1727 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1728 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1729 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1730 };
1731 std::vector<unsigned> Consts;
1732 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1733 int OtherSrcIdx = SrcIndices[i];
1734 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1735 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1736 continue;
1737 if (HasDst) {
1738 OtherSrcIdx--;
1739 OtherSelIdx--;
1740 }
1741 if (RegisterSDNode *Reg =
1742 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1743 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1744 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1745 ParentNode->getOperand(OtherSelIdx));
1746 Consts.push_back(Cst->getZExtValue());
1747 }
1748 }
1749 }
1750
1751 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1752 Consts.push_back(Cst->getZExtValue());
1753 if (!TII->fitsConstReadLimitations(Consts)) {
1754 return false;
1755 }
1756
1757 Sel = CstOffset;
1758 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1759 return true;
1760 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001761 case AMDGPU::MOV_IMM_I32:
1762 case AMDGPU::MOV_IMM_F32: {
1763 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1764 uint64_t ImmValue = 0;
1765
1766
1767 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1768 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1769 float FloatValue = FPC->getValueAPF().convertToFloat();
1770 if (FloatValue == 0.0) {
1771 ImmReg = AMDGPU::ZERO;
1772 } else if (FloatValue == 0.5) {
1773 ImmReg = AMDGPU::HALF;
1774 } else if (FloatValue == 1.0) {
1775 ImmReg = AMDGPU::ONE;
1776 } else {
1777 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1778 }
1779 } else {
1780 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1781 uint64_t Value = C->getZExtValue();
1782 if (Value == 0) {
1783 ImmReg = AMDGPU::ZERO;
1784 } else if (Value == 1) {
1785 ImmReg = AMDGPU::ONE_INT;
1786 } else {
1787 ImmValue = Value;
1788 }
1789 }
1790
1791 // Check that we aren't already using an immediate.
1792 // XXX: It's possible for an instruction to have more than one
1793 // immediate operand, but this is not supported yet.
1794 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1795 if (!Imm.getNode())
1796 return false;
1797 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1798 assert(C);
1799 if (C->getZExtValue())
1800 return false;
1801 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1802 }
1803 Src = DAG.getRegister(ImmReg, MVT::i32);
1804 return true;
1805 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001806 default:
1807 return false;
1808 }
1809}
1810
1811
1812/// \brief Fold the instructions after selecting them
1813SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1814 SelectionDAG &DAG) const {
1815 const R600InstrInfo *TII =
1816 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1817 if (!Node->isMachineOpcode())
1818 return Node;
1819 unsigned Opcode = Node->getMachineOpcode();
1820 SDValue FakeOp;
1821
1822 std::vector<SDValue> Ops;
1823 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1824 I != E; ++I)
1825 Ops.push_back(*I);
1826
1827 if (Opcode == AMDGPU::DOT_4) {
1828 int OperandIdx[] = {
1829 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1830 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1831 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1832 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1833 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1834 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1835 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1836 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1837 };
1838 int NegIdx[] = {
1839 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1840 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1841 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1842 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1843 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1844 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1845 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1846 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1847 };
1848 int AbsIdx[] = {
1849 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1850 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1851 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1852 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1853 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1854 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1855 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1856 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1857 };
1858 for (unsigned i = 0; i < 8; i++) {
1859 if (OperandIdx[i] < 0)
1860 return Node;
1861 SDValue &Src = Ops[OperandIdx[i] - 1];
1862 SDValue &Neg = Ops[NegIdx[i] - 1];
1863 SDValue &Abs = Ops[AbsIdx[i] - 1];
1864 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1865 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1866 if (HasDst)
1867 SelIdx--;
1868 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001869 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1870 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1871 }
1872 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1873 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1874 SDValue &Src = Ops[i];
1875 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001876 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1877 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001878 } else if (Opcode == AMDGPU::CLAMP_R600) {
1879 SDValue Src = Node->getOperand(0);
1880 if (!Src.isMachineOpcode() ||
1881 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1882 return Node;
1883 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1884 AMDGPU::OpName::clamp);
1885 if (ClampIdx < 0)
1886 return Node;
1887 std::vector<SDValue> Ops;
1888 unsigned NumOp = Src.getNumOperands();
1889 for(unsigned i = 0; i < NumOp; ++i)
1890 Ops.push_back(Src.getOperand(i));
1891 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1892 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1893 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001894 } else {
1895 if (!TII->hasInstrModifiers(Opcode))
1896 return Node;
1897 int OperandIdx[] = {
1898 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1899 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1900 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1901 };
1902 int NegIdx[] = {
1903 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1904 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1905 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1906 };
1907 int AbsIdx[] = {
1908 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1909 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1910 -1
1911 };
1912 for (unsigned i = 0; i < 3; i++) {
1913 if (OperandIdx[i] < 0)
1914 return Node;
1915 SDValue &Src = Ops[OperandIdx[i] - 1];
1916 SDValue &Neg = Ops[NegIdx[i] - 1];
1917 SDValue FakeAbs;
1918 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1919 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1920 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001921 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1922 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001923 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001924 ImmIdx--;
1925 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001926 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001927 SDValue &Imm = Ops[ImmIdx];
1928 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001929 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1930 }
1931 }
1932
1933 return Node;
1934}