blob: 5bb81296772618b0222d14453106184996103fc2 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000093 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
95 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000097 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000098 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000099 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000101 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
102 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
Tom Stellard365366f2013-01-23 02:09:06 +0000104 setOperationAction(ISD::LOAD, MVT::i32, Custom);
105 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000106 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
107
Tom Stellard75aadc22012-12-11 21:25:42 +0000108 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000109 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000111 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000112 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000113
Michel Danzer49812b52013-07-10 16:37:07 +0000114 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
115
Tom Stellardb852af52013-03-08 15:37:03 +0000116 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000117 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000118 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119}
120
121MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
122 MachineInstr * MI, MachineBasicBlock * BB) const {
123 MachineFunction * MF = BB->getParent();
124 MachineRegisterInfo &MRI = MF->getRegInfo();
125 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000126 const R600InstrInfo *TII =
127 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
129 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000130 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000131 if (TII->isLDSInstr(MI->getOpcode()) &&
132 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
133 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
134 assert(DstIdx != -1);
135 MachineInstrBuilder NewMI;
136 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
137 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
138 AMDGPU::OQAP);
139 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
140 MI->getOperand(0).getReg(),
141 AMDGPU::OQAP);
142 } else {
143 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
144 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
145 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000146 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
147 NewMI.addOperand(MI->getOperand(i));
148 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000149 } else {
150 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
151 }
152 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000153 case AMDGPU::CLAMP_R600: {
154 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
155 AMDGPU::MOV,
156 MI->getOperand(0).getReg(),
157 MI->getOperand(1).getReg());
158 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
159 break;
160 }
161
162 case AMDGPU::FABS_R600: {
163 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
164 AMDGPU::MOV,
165 MI->getOperand(0).getReg(),
166 MI->getOperand(1).getReg());
167 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
168 break;
169 }
170
171 case AMDGPU::FNEG_R600: {
172 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
173 AMDGPU::MOV,
174 MI->getOperand(0).getReg(),
175 MI->getOperand(1).getReg());
176 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
177 break;
178 }
179
Tom Stellard75aadc22012-12-11 21:25:42 +0000180 case AMDGPU::MASK_WRITE: {
181 unsigned maskedRegister = MI->getOperand(0).getReg();
182 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
183 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
184 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
185 break;
186 }
187
188 case AMDGPU::MOV_IMM_F32:
189 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
190 MI->getOperand(1).getFPImm()->getValueAPF()
191 .bitcastToAPInt().getZExtValue());
192 break;
193 case AMDGPU::MOV_IMM_I32:
194 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
195 MI->getOperand(1).getImm());
196 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000197 case AMDGPU::CONST_COPY: {
198 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
199 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000200 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000201 MI->getOperand(1).getImm());
202 break;
203 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000204
205 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000206 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000207 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
208 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
209
210 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
211 .addOperand(MI->getOperand(0))
212 .addOperand(MI->getOperand(1))
213 .addImm(EOP); // Set End of program bit
214 break;
215 }
216
Tom Stellard75aadc22012-12-11 21:25:42 +0000217 case AMDGPU::TXD: {
218 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
219 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000220 MachineOperand &RID = MI->getOperand(4);
221 MachineOperand &SID = MI->getOperand(5);
222 unsigned TextureId = MI->getOperand(6).getImm();
223 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
224 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000225
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 switch (TextureId) {
227 case 5: // Rect
228 CTX = CTY = 0;
229 break;
230 case 6: // Shadow1D
231 SrcW = SrcZ;
232 break;
233 case 7: // Shadow2D
234 SrcW = SrcZ;
235 break;
236 case 8: // ShadowRect
237 CTX = CTY = 0;
238 SrcW = SrcZ;
239 break;
240 case 9: // 1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 10: // 2DArray
245 CTZ = 0;
246 break;
247 case 11: // Shadow1DArray
248 SrcZ = SrcY;
249 CTZ = 0;
250 break;
251 case 12: // Shadow2DArray
252 CTZ = 0;
253 break;
254 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
256 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 .addImm(SrcX)
258 .addImm(SrcY)
259 .addImm(SrcZ)
260 .addImm(SrcW)
261 .addImm(0)
262 .addImm(0)
263 .addImm(0)
264 .addImm(0)
265 .addImm(1)
266 .addImm(2)
267 .addImm(3)
268 .addOperand(RID)
269 .addOperand(SID)
270 .addImm(CTX)
271 .addImm(CTY)
272 .addImm(CTZ)
273 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
275 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000276 .addImm(SrcX)
277 .addImm(SrcY)
278 .addImm(SrcZ)
279 .addImm(SrcW)
280 .addImm(0)
281 .addImm(0)
282 .addImm(0)
283 .addImm(0)
284 .addImm(1)
285 .addImm(2)
286 .addImm(3)
287 .addOperand(RID)
288 .addOperand(SID)
289 .addImm(CTX)
290 .addImm(CTY)
291 .addImm(CTZ)
292 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000293 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
294 .addOperand(MI->getOperand(0))
295 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000296 .addImm(SrcX)
297 .addImm(SrcY)
298 .addImm(SrcZ)
299 .addImm(SrcW)
300 .addImm(0)
301 .addImm(0)
302 .addImm(0)
303 .addImm(0)
304 .addImm(1)
305 .addImm(2)
306 .addImm(3)
307 .addOperand(RID)
308 .addOperand(SID)
309 .addImm(CTX)
310 .addImm(CTY)
311 .addImm(CTZ)
312 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000313 .addReg(T0, RegState::Implicit)
314 .addReg(T1, RegState::Implicit);
315 break;
316 }
317
318 case AMDGPU::TXD_SHADOW: {
319 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
320 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000321 MachineOperand &RID = MI->getOperand(4);
322 MachineOperand &SID = MI->getOperand(5);
323 unsigned TextureId = MI->getOperand(6).getImm();
324 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
325 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
326
327 switch (TextureId) {
328 case 5: // Rect
329 CTX = CTY = 0;
330 break;
331 case 6: // Shadow1D
332 SrcW = SrcZ;
333 break;
334 case 7: // Shadow2D
335 SrcW = SrcZ;
336 break;
337 case 8: // ShadowRect
338 CTX = CTY = 0;
339 SrcW = SrcZ;
340 break;
341 case 9: // 1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 10: // 2DArray
346 CTZ = 0;
347 break;
348 case 11: // Shadow1DArray
349 SrcZ = SrcY;
350 CTZ = 0;
351 break;
352 case 12: // Shadow2DArray
353 CTZ = 0;
354 break;
355 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000356
357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
358 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
377 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000378 .addImm(SrcX)
379 .addImm(SrcY)
380 .addImm(SrcZ)
381 .addImm(SrcW)
382 .addImm(0)
383 .addImm(0)
384 .addImm(0)
385 .addImm(0)
386 .addImm(1)
387 .addImm(2)
388 .addImm(3)
389 .addOperand(RID)
390 .addOperand(SID)
391 .addImm(CTX)
392 .addImm(CTY)
393 .addImm(CTZ)
394 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
396 .addOperand(MI->getOperand(0))
397 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000398 .addImm(SrcX)
399 .addImm(SrcY)
400 .addImm(SrcZ)
401 .addImm(SrcW)
402 .addImm(0)
403 .addImm(0)
404 .addImm(0)
405 .addImm(0)
406 .addImm(1)
407 .addImm(2)
408 .addImm(3)
409 .addOperand(RID)
410 .addOperand(SID)
411 .addImm(CTX)
412 .addImm(CTY)
413 .addImm(CTZ)
414 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000415 .addReg(T0, RegState::Implicit)
416 .addReg(T1, RegState::Implicit);
417 break;
418 }
419
420 case AMDGPU::BRANCH:
421 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000422 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 break;
424
425 case AMDGPU::BRANCH_COND_f32: {
426 MachineInstr *NewMI =
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
428 AMDGPU::PREDICATE_BIT)
429 .addOperand(MI->getOperand(1))
430 .addImm(OPCODE_IS_NOT_ZERO)
431 .addImm(0); // Flags
432 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000433 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000434 .addOperand(MI->getOperand(0))
435 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
436 break;
437 }
438
439 case AMDGPU::BRANCH_COND_i32: {
440 MachineInstr *NewMI =
441 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
442 AMDGPU::PREDICATE_BIT)
443 .addOperand(MI->getOperand(1))
444 .addImm(OPCODE_IS_NOT_ZERO_INT)
445 .addImm(0); // Flags
446 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 .addOperand(MI->getOperand(0))
449 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
450 break;
451 }
452
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 case AMDGPU::EG_ExportSwz:
454 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 // Instruction is left unmodified if its not the last one of its type
456 bool isLastInstructionOfItsType = true;
457 unsigned InstExportType = MI->getOperand(1).getImm();
458 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
459 EndBlock = BB->end(); NextExportInst != EndBlock;
460 NextExportInst = llvm::next(NextExportInst)) {
461 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
462 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
463 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
464 .getImm();
465 if (CurrentInstExportType == InstExportType) {
466 isLastInstructionOfItsType = false;
467 break;
468 }
469 }
470 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000472 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 return BB;
474 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
475 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
476 .addOperand(MI->getOperand(0))
477 .addOperand(MI->getOperand(1))
478 .addOperand(MI->getOperand(2))
479 .addOperand(MI->getOperand(3))
480 .addOperand(MI->getOperand(4))
481 .addOperand(MI->getOperand(5))
482 .addOperand(MI->getOperand(6))
483 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000484 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 break;
486 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000487 case AMDGPU::RETURN: {
488 // RETURN instructions must have the live-out registers as implicit uses,
489 // otherwise they appear dead.
490 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
491 MachineInstrBuilder MIB(*MF, MI);
492 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
493 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
494 return BB;
495 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 }
497
498 MI->eraseFromParent();
499 return BB;
500}
501
502//===----------------------------------------------------------------------===//
503// Custom DAG Lowering Operations
504//===----------------------------------------------------------------------===//
505
Tom Stellard75aadc22012-12-11 21:25:42 +0000506SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000507 MachineFunction &MF = DAG.getMachineFunction();
508 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 switch (Op.getOpcode()) {
510 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000511 case ISD::FCOS:
512 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000514 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000515 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000516 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000517 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 case ISD::INTRINSIC_VOID: {
519 SDValue Chain = Op.getOperand(0);
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
522 switch (IntrinsicID) {
523 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
525 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000526 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000527 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000528 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000529 case AMDGPUIntrinsic::R600_store_swizzle: {
530 const SDValue Args[8] = {
531 Chain,
532 Op.getOperand(2), // Export Value
533 Op.getOperand(3), // ArrayBase
534 Op.getOperand(4), // Type
535 DAG.getConstant(0, MVT::i32), // SWZ_X
536 DAG.getConstant(1, MVT::i32), // SWZ_Y
537 DAG.getConstant(2, MVT::i32), // SWZ_Z
538 DAG.getConstant(3, MVT::i32) // SWZ_W
539 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000540 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000541 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000543
Tom Stellard75aadc22012-12-11 21:25:42 +0000544 // default for switch(IntrinsicID)
545 default: break;
546 }
547 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
548 break;
549 }
550 case ISD::INTRINSIC_WO_CHAIN: {
551 unsigned IntrinsicID =
552 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
553 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000554 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000555 switch(IntrinsicID) {
556 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000557 case AMDGPUIntrinsic::R600_load_input: {
558 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
559 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
560 MachineFunction &MF = DAG.getMachineFunction();
561 MachineRegisterInfo &MRI = MF.getRegInfo();
562 MRI.addLiveIn(Reg);
563 return DAG.getCopyFromReg(DAG.getEntryNode(),
564 SDLoc(DAG.getEntryNode()), Reg, VT);
565 }
566
567 case AMDGPUIntrinsic::R600_interp_input: {
568 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
569 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
570 MachineSDNode *interp;
571 if (ijb < 0) {
572 const MachineFunction &MF = DAG.getMachineFunction();
573 const R600InstrInfo *TII =
574 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
575 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
576 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
577 return DAG.getTargetExtractSubreg(
578 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
579 DL, MVT::f32, SDValue(interp, 0));
580 }
581 MachineFunction &MF = DAG.getMachineFunction();
582 MachineRegisterInfo &MRI = MF.getRegInfo();
583 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
584 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
585 MRI.addLiveIn(RegisterI);
586 MRI.addLiveIn(RegisterJ);
587 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
588 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
589 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
590 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
591
592 if (slot % 4 < 2)
593 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
594 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
595 RegisterJNode, RegisterINode);
596 else
597 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
598 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
599 RegisterJNode, RegisterINode);
600 return SDValue(interp, slot % 2);
601 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000602 case AMDGPUIntrinsic::R600_interp_xy:
603 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000604 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000605 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000606 SDValue RegisterINode = Op.getOperand(2);
607 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000608
Vincent Lejeunef143af32013-11-11 22:10:24 +0000609 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000610 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000611 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000612 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000613 else
614 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000615 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000616 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000617 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
618 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000619 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000620 case AMDGPUIntrinsic::R600_tex:
621 case AMDGPUIntrinsic::R600_texc:
622 case AMDGPUIntrinsic::R600_txl:
623 case AMDGPUIntrinsic::R600_txlc:
624 case AMDGPUIntrinsic::R600_txb:
625 case AMDGPUIntrinsic::R600_txbc:
626 case AMDGPUIntrinsic::R600_txf:
627 case AMDGPUIntrinsic::R600_txq:
628 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000629 case AMDGPUIntrinsic::R600_ddy:
630 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000631 unsigned TextureOp;
632 switch (IntrinsicID) {
633 case AMDGPUIntrinsic::R600_tex:
634 TextureOp = 0;
635 break;
636 case AMDGPUIntrinsic::R600_texc:
637 TextureOp = 1;
638 break;
639 case AMDGPUIntrinsic::R600_txl:
640 TextureOp = 2;
641 break;
642 case AMDGPUIntrinsic::R600_txlc:
643 TextureOp = 3;
644 break;
645 case AMDGPUIntrinsic::R600_txb:
646 TextureOp = 4;
647 break;
648 case AMDGPUIntrinsic::R600_txbc:
649 TextureOp = 5;
650 break;
651 case AMDGPUIntrinsic::R600_txf:
652 TextureOp = 6;
653 break;
654 case AMDGPUIntrinsic::R600_txq:
655 TextureOp = 7;
656 break;
657 case AMDGPUIntrinsic::R600_ddx:
658 TextureOp = 8;
659 break;
660 case AMDGPUIntrinsic::R600_ddy:
661 TextureOp = 9;
662 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000663 case AMDGPUIntrinsic::R600_ldptr:
664 TextureOp = 10;
665 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000666 default:
667 llvm_unreachable("Unknow Texture Operation");
668 }
669
670 SDValue TexArgs[19] = {
671 DAG.getConstant(TextureOp, MVT::i32),
672 Op.getOperand(1),
673 DAG.getConstant(0, MVT::i32),
674 DAG.getConstant(1, MVT::i32),
675 DAG.getConstant(2, MVT::i32),
676 DAG.getConstant(3, MVT::i32),
677 Op.getOperand(2),
678 Op.getOperand(3),
679 Op.getOperand(4),
680 DAG.getConstant(0, MVT::i32),
681 DAG.getConstant(1, MVT::i32),
682 DAG.getConstant(2, MVT::i32),
683 DAG.getConstant(3, MVT::i32),
684 Op.getOperand(5),
685 Op.getOperand(6),
686 Op.getOperand(7),
687 Op.getOperand(8),
688 Op.getOperand(9),
689 Op.getOperand(10)
690 };
691 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
692 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000693 case AMDGPUIntrinsic::AMDGPU_dp4: {
694 SDValue Args[8] = {
695 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
696 DAG.getConstant(0, MVT::i32)),
697 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
698 DAG.getConstant(0, MVT::i32)),
699 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
700 DAG.getConstant(1, MVT::i32)),
701 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
702 DAG.getConstant(1, MVT::i32)),
703 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
704 DAG.getConstant(2, MVT::i32)),
705 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
706 DAG.getConstant(2, MVT::i32)),
707 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
708 DAG.getConstant(3, MVT::i32)),
709 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
710 DAG.getConstant(3, MVT::i32))
711 };
712 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
713 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000714
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000715 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000716 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000717 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000718 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000719 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000720 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000721 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000722 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000723 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000724 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000725 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000726 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000727 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000728 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000729 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000730 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000731 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000732 return LowerImplicitParameter(DAG, VT, DL, 8);
733
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000734 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000735 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
736 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000737 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000738 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
739 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000740 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000741 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
742 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000743 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000744 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
745 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000746 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000747 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
748 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000749 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000750 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
751 AMDGPU::T0_Z, VT);
752 }
753 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
754 break;
755 }
756 } // end switch(Op.getOpcode())
757 return SDValue();
758}
759
760void R600TargetLowering::ReplaceNodeResults(SDNode *N,
761 SmallVectorImpl<SDValue> &Results,
762 SelectionDAG &DAG) const {
763 switch (N->getOpcode()) {
764 default: return;
765 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000766 return;
767 case ISD::LOAD: {
768 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
769 Results.push_back(SDValue(Node, 0));
770 Results.push_back(SDValue(Node, 1));
771 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
772 // function
773 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
774 return;
775 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000776 case ISD::STORE:
777 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
778 Results.push_back(SDValue(Node, 0));
779 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 }
781}
782
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000783SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
784 // On hw >= R700, COS/SIN input must be between -1. and 1.
785 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
786 EVT VT = Op.getValueType();
787 SDValue Arg = Op.getOperand(0);
788 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
789 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
790 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
791 DAG.getConstantFP(0.15915494309, MVT::f32)),
792 DAG.getConstantFP(0.5, MVT::f32)));
793 unsigned TrigNode;
794 switch (Op.getOpcode()) {
795 case ISD::FCOS:
796 TrigNode = AMDGPUISD::COS_HW;
797 break;
798 case ISD::FSIN:
799 TrigNode = AMDGPUISD::SIN_HW;
800 break;
801 default:
802 llvm_unreachable("Wrong trig opcode");
803 }
804 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
805 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
806 DAG.getConstantFP(-0.5, MVT::f32)));
807 if (Gen >= AMDGPUSubtarget::R700)
808 return TrigVal;
809 // On R600 hw, COS/SIN input must be between -Pi and Pi.
810 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
811 DAG.getConstantFP(3.14159265359, MVT::f32));
812}
813
Tom Stellard75aadc22012-12-11 21:25:42 +0000814SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
815 return DAG.getNode(
816 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000817 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000818 MVT::i1,
819 Op, DAG.getConstantFP(0.0f, MVT::f32),
820 DAG.getCondCode(ISD::SETNE)
821 );
822}
823
Tom Stellard75aadc22012-12-11 21:25:42 +0000824SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000825 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000826 unsigned DwordOffset) const {
827 unsigned ByteOffset = DwordOffset * 4;
828 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000829 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000830
831 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
832 assert(isInt<16>(ByteOffset));
833
834 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
835 DAG.getConstant(ByteOffset, MVT::i32), // PTR
836 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
837 false, false, false, 0);
838}
839
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000840SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
841
842 MachineFunction &MF = DAG.getMachineFunction();
843 const AMDGPUFrameLowering *TFL =
844 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
845
846 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
847 assert(FIN);
848
849 unsigned FrameIndex = FIN->getIndex();
850 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
851 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
852}
853
Tom Stellard75aadc22012-12-11 21:25:42 +0000854bool R600TargetLowering::isZero(SDValue Op) const {
855 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
856 return Cst->isNullValue();
857 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
858 return CstFP->isZero();
859 } else {
860 return false;
861 }
862}
863
864SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000865 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000866 EVT VT = Op.getValueType();
867
868 SDValue LHS = Op.getOperand(0);
869 SDValue RHS = Op.getOperand(1);
870 SDValue True = Op.getOperand(2);
871 SDValue False = Op.getOperand(3);
872 SDValue CC = Op.getOperand(4);
873 SDValue Temp;
874
875 // LHS and RHS are guaranteed to be the same value type
876 EVT CompareVT = LHS.getValueType();
877
878 // Check if we can lower this to a native operation.
879
Tom Stellard2add82d2013-03-08 15:37:09 +0000880 // Try to lower to a SET* instruction:
881 //
882 // SET* can match the following patterns:
883 //
Tom Stellardcd428182013-09-28 02:50:38 +0000884 // select_cc f32, f32, -1, 0, cc_supported
885 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
886 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000887 //
888
889 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000890 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
891 ISD::CondCode InverseCC =
892 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000893 if (isHWTrueValue(False) && isHWFalseValue(True)) {
894 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
895 std::swap(False, True);
896 CC = DAG.getCondCode(InverseCC);
897 } else {
898 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
899 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
900 std::swap(False, True);
901 std::swap(LHS, RHS);
902 CC = DAG.getCondCode(SwapInvCC);
903 }
904 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000905 }
906
907 if (isHWTrueValue(True) && isHWFalseValue(False) &&
908 (CompareVT == VT || VT == MVT::i32)) {
909 // This can be matched by a SET* instruction.
910 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
911 }
912
Tom Stellard75aadc22012-12-11 21:25:42 +0000913 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000914 //
915 // CND* can match the following patterns:
916 //
Tom Stellardcd428182013-09-28 02:50:38 +0000917 // select_cc f32, 0.0, f32, f32, cc_supported
918 // select_cc f32, 0.0, i32, i32, cc_supported
919 // select_cc i32, 0, f32, f32, cc_supported
920 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000921 //
Tom Stellardcd428182013-09-28 02:50:38 +0000922
923 // Try to move the zero value to the RHS
924 if (isZero(LHS)) {
925 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
926 // Try swapping the operands
927 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
928 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
929 std::swap(LHS, RHS);
930 CC = DAG.getCondCode(CCSwapped);
931 } else {
932 // Try inverting the conditon and then swapping the operands
933 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
934 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
935 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
936 std::swap(True, False);
937 std::swap(LHS, RHS);
938 CC = DAG.getCondCode(CCSwapped);
939 }
940 }
941 }
942 if (isZero(RHS)) {
943 SDValue Cond = LHS;
944 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000945 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
946 if (CompareVT != VT) {
947 // Bitcast True / False to the correct types. This will end up being
948 // a nop, but it allows us to define only a single pattern in the
949 // .TD files for each CND* instruction rather than having to have
950 // one pattern for integer True/False and one for fp True/False
951 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
952 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
953 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000954
955 switch (CCOpcode) {
956 case ISD::SETONE:
957 case ISD::SETUNE:
958 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000959 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
960 Temp = True;
961 True = False;
962 False = Temp;
963 break;
964 default:
965 break;
966 }
967 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
968 Cond, Zero,
969 True, False,
970 DAG.getCondCode(CCOpcode));
971 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
972 }
973
Tom Stellard75aadc22012-12-11 21:25:42 +0000974
975 // Possible Min/Max pattern
976 SDValue MinMax = LowerMinMax(Op, DAG);
977 if (MinMax.getNode()) {
978 return MinMax;
979 }
980
981 // If we make it this for it means we have no native instructions to handle
982 // this SELECT_CC, so we must lower it.
983 SDValue HWTrue, HWFalse;
984
985 if (CompareVT == MVT::f32) {
986 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
987 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
988 } else if (CompareVT == MVT::i32) {
989 HWTrue = DAG.getConstant(-1, CompareVT);
990 HWFalse = DAG.getConstant(0, CompareVT);
991 }
992 else {
993 assert(!"Unhandled value type in LowerSELECT_CC");
994 }
995
996 // Lower this unsupported SELECT_CC into a combination of two supported
997 // SELECT_CC operations.
998 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
999
1000 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1001 Cond, HWFalse,
1002 True, False,
1003 DAG.getCondCode(ISD::SETNE));
1004}
1005
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001006/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
1007/// convert these pointers to a register index. Each register holds
1008/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1009/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1010/// for indirect addressing.
1011SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1012 unsigned StackWidth,
1013 SelectionDAG &DAG) const {
1014 unsigned SRLPad;
1015 switch(StackWidth) {
1016 case 1:
1017 SRLPad = 2;
1018 break;
1019 case 2:
1020 SRLPad = 3;
1021 break;
1022 case 4:
1023 SRLPad = 4;
1024 break;
1025 default: llvm_unreachable("Invalid stack width");
1026 }
1027
Andrew Trickef9de2a2013-05-25 02:42:55 +00001028 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001029 DAG.getConstant(SRLPad, MVT::i32));
1030}
1031
1032void R600TargetLowering::getStackAddress(unsigned StackWidth,
1033 unsigned ElemIdx,
1034 unsigned &Channel,
1035 unsigned &PtrIncr) const {
1036 switch (StackWidth) {
1037 default:
1038 case 1:
1039 Channel = 0;
1040 if (ElemIdx > 0) {
1041 PtrIncr = 1;
1042 } else {
1043 PtrIncr = 0;
1044 }
1045 break;
1046 case 2:
1047 Channel = ElemIdx % 2;
1048 if (ElemIdx == 2) {
1049 PtrIncr = 1;
1050 } else {
1051 PtrIncr = 0;
1052 }
1053 break;
1054 case 4:
1055 Channel = ElemIdx;
1056 PtrIncr = 0;
1057 break;
1058 }
1059}
1060
Tom Stellard75aadc22012-12-11 21:25:42 +00001061SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001062 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001063 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1064 SDValue Chain = Op.getOperand(0);
1065 SDValue Value = Op.getOperand(1);
1066 SDValue Ptr = Op.getOperand(2);
1067
Tom Stellard2ffc3302013-08-26 15:05:44 +00001068 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001069 if (Result.getNode()) {
1070 return Result;
1071 }
1072
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001073 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1074 if (StoreNode->isTruncatingStore()) {
1075 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001076 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001077 EVT MemVT = StoreNode->getMemoryVT();
1078 SDValue MaskConstant;
1079 if (MemVT == MVT::i8) {
1080 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1081 } else {
1082 assert(MemVT == MVT::i16);
1083 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1084 }
1085 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1086 DAG.getConstant(2, MVT::i32));
1087 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1088 DAG.getConstant(0x00000003, VT));
1089 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1090 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1091 DAG.getConstant(3, VT));
1092 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1093 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1094 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1095 // vector instead.
1096 SDValue Src[4] = {
1097 ShiftedValue,
1098 DAG.getConstant(0, MVT::i32),
1099 DAG.getConstant(0, MVT::i32),
1100 Mask
1101 };
1102 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1103 SDValue Args[3] = { Chain, Input, DWordAddr };
1104 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1105 Op->getVTList(), Args, 3, MemVT,
1106 StoreNode->getMemOperand());
1107 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1108 Value.getValueType().bitsGE(MVT::i32)) {
1109 // Convert pointer from byte address to dword address.
1110 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1111 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1112 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001113
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001114 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1115 assert(!"Truncated and indexed stores not supported yet");
1116 } else {
1117 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1118 }
1119 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001120 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001121 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001122
1123 EVT ValueVT = Value.getValueType();
1124
1125 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1126 return SDValue();
1127 }
1128
1129 // Lowering for indirect addressing
1130
1131 const MachineFunction &MF = DAG.getMachineFunction();
1132 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1133 getTargetMachine().getFrameLowering());
1134 unsigned StackWidth = TFL->getStackWidth(MF);
1135
1136 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1137
1138 if (ValueVT.isVector()) {
1139 unsigned NumElemVT = ValueVT.getVectorNumElements();
1140 EVT ElemVT = ValueVT.getVectorElementType();
1141 SDValue Stores[4];
1142
1143 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1144 "vector width in load");
1145
1146 for (unsigned i = 0; i < NumElemVT; ++i) {
1147 unsigned Channel, PtrIncr;
1148 getStackAddress(StackWidth, i, Channel, PtrIncr);
1149 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1150 DAG.getConstant(PtrIncr, MVT::i32));
1151 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1152 Value, DAG.getConstant(i, MVT::i32));
1153
1154 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1155 Chain, Elem, Ptr,
1156 DAG.getTargetConstant(Channel, MVT::i32));
1157 }
1158 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1159 } else {
1160 if (ValueVT == MVT::i8) {
1161 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1162 }
1163 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001164 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001165 }
1166
1167 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001168}
1169
Tom Stellard365366f2013-01-23 02:09:06 +00001170// return (512 + (kc_bank << 12)
1171static int
1172ConstantAddressBlock(unsigned AddressSpace) {
1173 switch (AddressSpace) {
1174 case AMDGPUAS::CONSTANT_BUFFER_0:
1175 return 512;
1176 case AMDGPUAS::CONSTANT_BUFFER_1:
1177 return 512 + 4096;
1178 case AMDGPUAS::CONSTANT_BUFFER_2:
1179 return 512 + 4096 * 2;
1180 case AMDGPUAS::CONSTANT_BUFFER_3:
1181 return 512 + 4096 * 3;
1182 case AMDGPUAS::CONSTANT_BUFFER_4:
1183 return 512 + 4096 * 4;
1184 case AMDGPUAS::CONSTANT_BUFFER_5:
1185 return 512 + 4096 * 5;
1186 case AMDGPUAS::CONSTANT_BUFFER_6:
1187 return 512 + 4096 * 6;
1188 case AMDGPUAS::CONSTANT_BUFFER_7:
1189 return 512 + 4096 * 7;
1190 case AMDGPUAS::CONSTANT_BUFFER_8:
1191 return 512 + 4096 * 8;
1192 case AMDGPUAS::CONSTANT_BUFFER_9:
1193 return 512 + 4096 * 9;
1194 case AMDGPUAS::CONSTANT_BUFFER_10:
1195 return 512 + 4096 * 10;
1196 case AMDGPUAS::CONSTANT_BUFFER_11:
1197 return 512 + 4096 * 11;
1198 case AMDGPUAS::CONSTANT_BUFFER_12:
1199 return 512 + 4096 * 12;
1200 case AMDGPUAS::CONSTANT_BUFFER_13:
1201 return 512 + 4096 * 13;
1202 case AMDGPUAS::CONSTANT_BUFFER_14:
1203 return 512 + 4096 * 14;
1204 case AMDGPUAS::CONSTANT_BUFFER_15:
1205 return 512 + 4096 * 15;
1206 default:
1207 return -1;
1208 }
1209}
1210
1211SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1212{
1213 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001214 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001215 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1216 SDValue Chain = Op.getOperand(0);
1217 SDValue Ptr = Op.getOperand(1);
1218 SDValue LoweredLoad;
1219
Tom Stellard35bb18c2013-08-26 15:06:04 +00001220 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1221 SDValue MergedValues[2] = {
1222 SplitVectorLoad(Op, DAG),
1223 Chain
1224 };
1225 return DAG.getMergeValues(MergedValues, 2, DL);
1226 }
1227
Tom Stellard365366f2013-01-23 02:09:06 +00001228 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Tom Stellardaf775432013-10-23 00:44:32 +00001229 if (ConstantBlock > -1 && LoadNode->getExtensionType() != ISD::SEXTLOAD) {
Tom Stellard365366f2013-01-23 02:09:06 +00001230 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001231 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1232 isa<Constant>(LoadNode->getSrcValue()) ||
1233 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001234 SDValue Slots[4];
1235 for (unsigned i = 0; i < 4; i++) {
1236 // We want Const position encoded with the following formula :
1237 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1238 // const_index is Ptr computed by llvm using an alignment of 16.
1239 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1240 // then div by 4 at the ISel step
1241 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1242 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1243 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1244 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001245 EVT NewVT = MVT::v4i32;
1246 unsigned NumElements = 4;
1247 if (VT.isVector()) {
1248 NewVT = VT;
1249 NumElements = VT.getVectorNumElements();
1250 }
1251 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001252 } else {
1253 // non constant ptr cant be folded, keeps it as a v4f32 load
1254 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001255 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001256 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001257 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001258 );
1259 }
1260
1261 if (!VT.isVector()) {
1262 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1263 DAG.getConstant(0, MVT::i32));
1264 }
1265
1266 SDValue MergedValues[2] = {
1267 Result,
1268 Chain
1269 };
1270 return DAG.getMergeValues(MergedValues, 2, DL);
1271 }
1272
Matt Arsenault909d0c02013-10-30 23:43:29 +00001273 // For most operations returning SDValue() will result in the node being
1274 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1275 // need to manually expand loads that may be legal in some address spaces and
1276 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1277 // compute shaders, since the data is sign extended when it is uploaded to the
1278 // buffer. However SEXT loads from other address spaces are not supported, so
1279 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001280 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1281 EVT MemVT = LoadNode->getMemoryVT();
1282 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1283 SDValue ShiftAmount =
1284 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1285 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1286 LoadNode->getPointerInfo(), MemVT,
1287 LoadNode->isVolatile(),
1288 LoadNode->isNonTemporal(),
1289 LoadNode->getAlignment());
1290 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1291 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1292
1293 SDValue MergedValues[2] = { Sra, Chain };
1294 return DAG.getMergeValues(MergedValues, 2, DL);
1295 }
1296
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001297 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1298 return SDValue();
1299 }
1300
1301 // Lowering for indirect addressing
1302 const MachineFunction &MF = DAG.getMachineFunction();
1303 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1304 getTargetMachine().getFrameLowering());
1305 unsigned StackWidth = TFL->getStackWidth(MF);
1306
1307 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1308
1309 if (VT.isVector()) {
1310 unsigned NumElemVT = VT.getVectorNumElements();
1311 EVT ElemVT = VT.getVectorElementType();
1312 SDValue Loads[4];
1313
1314 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1315 "vector width in load");
1316
1317 for (unsigned i = 0; i < NumElemVT; ++i) {
1318 unsigned Channel, PtrIncr;
1319 getStackAddress(StackWidth, i, Channel, PtrIncr);
1320 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1321 DAG.getConstant(PtrIncr, MVT::i32));
1322 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1323 Chain, Ptr,
1324 DAG.getTargetConstant(Channel, MVT::i32),
1325 Op.getOperand(2));
1326 }
1327 for (unsigned i = NumElemVT; i < 4; ++i) {
1328 Loads[i] = DAG.getUNDEF(ElemVT);
1329 }
1330 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1331 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1332 } else {
1333 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1334 Chain, Ptr,
1335 DAG.getTargetConstant(0, MVT::i32), // Channel
1336 Op.getOperand(2));
1337 }
1338
1339 SDValue Ops[2];
1340 Ops[0] = LoweredLoad;
1341 Ops[1] = Chain;
1342
1343 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001344}
Tom Stellard75aadc22012-12-11 21:25:42 +00001345
Tom Stellard75aadc22012-12-11 21:25:42 +00001346/// XXX Only kernel functions are supported, so we can assume for now that
1347/// every function is a kernel function, but in the future we should use
1348/// separate calling conventions for kernel and non-kernel functions.
1349SDValue R600TargetLowering::LowerFormalArguments(
1350 SDValue Chain,
1351 CallingConv::ID CallConv,
1352 bool isVarArg,
1353 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001354 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001355 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001356 SmallVector<CCValAssign, 16> ArgLocs;
1357 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1358 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001359 MachineFunction &MF = DAG.getMachineFunction();
1360 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001361
Tom Stellardaf775432013-10-23 00:44:32 +00001362 SmallVector<ISD::InputArg, 8> LocalIns;
1363
1364 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1365 LocalIns);
1366
1367 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001368
Tom Stellard1e803092013-07-23 01:48:18 +00001369 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001370 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001371 EVT VT = Ins[i].VT;
1372 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001373
Vincent Lejeunef143af32013-11-11 22:10:24 +00001374 if (ShaderType != ShaderType::COMPUTE) {
1375 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1376 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1377 InVals.push_back(Register);
1378 continue;
1379 }
1380
Tom Stellard75aadc22012-12-11 21:25:42 +00001381 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001382 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001383
1384 // The first 36 bytes of the input buffer contains information about
1385 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001386 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1387 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1388 MachinePointerInfo(UndefValue::get(PtrTy)),
1389 MemVT, false, false, 4);
1390 // 4 is the prefered alignment for
1391 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001392 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001393 }
1394 return Chain;
1395}
1396
Matt Arsenault758659232013-05-18 00:21:46 +00001397EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001398 if (!VT.isVector()) return MVT::i32;
1399 return VT.changeVectorElementTypeToInteger();
1400}
1401
Benjamin Kramer193960c2013-06-11 13:32:25 +00001402static SDValue
1403CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1404 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001405 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1406 assert(RemapSwizzle.empty());
1407 SDValue NewBldVec[4] = {
1408 VectorEntry.getOperand(0),
1409 VectorEntry.getOperand(1),
1410 VectorEntry.getOperand(2),
1411 VectorEntry.getOperand(3)
1412 };
1413
1414 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001415 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1416 // We mask write here to teach later passes that the ith element of this
1417 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1418 // break false dependencies and additionnaly make assembly easier to read.
1419 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001420 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1421 if (C->isZero()) {
1422 RemapSwizzle[i] = 4; // SEL_0
1423 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1424 } else if (C->isExactlyValue(1.0)) {
1425 RemapSwizzle[i] = 5; // SEL_1
1426 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1427 }
1428 }
1429
1430 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1431 continue;
1432 for (unsigned j = 0; j < i; j++) {
1433 if (NewBldVec[i] == NewBldVec[j]) {
1434 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1435 RemapSwizzle[i] = j;
1436 break;
1437 }
1438 }
1439 }
1440
1441 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1442 VectorEntry.getValueType(), NewBldVec, 4);
1443}
1444
Benjamin Kramer193960c2013-06-11 13:32:25 +00001445static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1446 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001447 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1448 assert(RemapSwizzle.empty());
1449 SDValue NewBldVec[4] = {
1450 VectorEntry.getOperand(0),
1451 VectorEntry.getOperand(1),
1452 VectorEntry.getOperand(2),
1453 VectorEntry.getOperand(3)
1454 };
1455 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001456 for (unsigned i = 0; i < 4; i++)
1457 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001458
1459 for (unsigned i = 0; i < 4; i++) {
1460 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1461 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1462 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001463 if (i == Idx) {
1464 isUnmovable[Idx] = true;
1465 continue;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001466 }
Vincent Lejeune301beb82013-10-13 17:56:04 +00001467 if (isUnmovable[Idx])
1468 continue;
1469 // Swap i and Idx
1470 std::swap(NewBldVec[Idx], NewBldVec[i]);
1471 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1472 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001473 }
1474 }
1475
1476 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1477 VectorEntry.getValueType(), NewBldVec, 4);
1478}
1479
1480
1481SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1482SDValue Swz[4], SelectionDAG &DAG) const {
1483 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1484 // Old -> New swizzle values
1485 DenseMap<unsigned, unsigned> SwizzleRemap;
1486
1487 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1488 for (unsigned i = 0; i < 4; i++) {
1489 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1490 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1491 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1492 }
1493
1494 SwizzleRemap.clear();
1495 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1496 for (unsigned i = 0; i < 4; i++) {
1497 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1498 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1499 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1500 }
1501
1502 return BuildVector;
1503}
1504
1505
Tom Stellard75aadc22012-12-11 21:25:42 +00001506//===----------------------------------------------------------------------===//
1507// Custom DAG Optimizations
1508//===----------------------------------------------------------------------===//
1509
1510SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1511 DAGCombinerInfo &DCI) const {
1512 SelectionDAG &DAG = DCI.DAG;
1513
1514 switch (N->getOpcode()) {
1515 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1516 case ISD::FP_ROUND: {
1517 SDValue Arg = N->getOperand(0);
1518 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001519 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001520 Arg.getOperand(0));
1521 }
1522 break;
1523 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001524
1525 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1526 // (i32 select_cc f32, f32, -1, 0 cc)
1527 //
1528 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1529 // this to one of the SET*_DX10 instructions.
1530 case ISD::FP_TO_SINT: {
1531 SDValue FNeg = N->getOperand(0);
1532 if (FNeg.getOpcode() != ISD::FNEG) {
1533 return SDValue();
1534 }
1535 SDValue SelectCC = FNeg.getOperand(0);
1536 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1537 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1538 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1539 !isHWTrueValue(SelectCC.getOperand(2)) ||
1540 !isHWFalseValue(SelectCC.getOperand(3))) {
1541 return SDValue();
1542 }
1543
Andrew Trickef9de2a2013-05-25 02:42:55 +00001544 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001545 SelectCC.getOperand(0), // LHS
1546 SelectCC.getOperand(1), // RHS
1547 DAG.getConstant(-1, MVT::i32), // True
1548 DAG.getConstant(0, MVT::i32), // Flase
1549 SelectCC.getOperand(4)); // CC
1550
1551 break;
1552 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001553
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001554 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1555 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001556 case ISD::INSERT_VECTOR_ELT: {
1557 SDValue InVec = N->getOperand(0);
1558 SDValue InVal = N->getOperand(1);
1559 SDValue EltNo = N->getOperand(2);
1560 SDLoc dl(N);
1561
1562 // If the inserted element is an UNDEF, just use the input vector.
1563 if (InVal.getOpcode() == ISD::UNDEF)
1564 return InVec;
1565
1566 EVT VT = InVec.getValueType();
1567
1568 // If we can't generate a legal BUILD_VECTOR, exit
1569 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1570 return SDValue();
1571
1572 // Check that we know which element is being inserted
1573 if (!isa<ConstantSDNode>(EltNo))
1574 return SDValue();
1575 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1576
1577 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1578 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1579 // vector elements.
1580 SmallVector<SDValue, 8> Ops;
1581 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1582 Ops.append(InVec.getNode()->op_begin(),
1583 InVec.getNode()->op_end());
1584 } else if (InVec.getOpcode() == ISD::UNDEF) {
1585 unsigned NElts = VT.getVectorNumElements();
1586 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1587 } else {
1588 return SDValue();
1589 }
1590
1591 // Insert the element
1592 if (Elt < Ops.size()) {
1593 // All the operands of BUILD_VECTOR must have the same type;
1594 // we enforce that here.
1595 EVT OpVT = Ops[0].getValueType();
1596 if (InVal.getValueType() != OpVT)
1597 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1598 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1599 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1600 Ops[Elt] = InVal;
1601 }
1602
1603 // Return the new vector
1604 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1605 VT, &Ops[0], Ops.size());
1606 }
1607
Tom Stellard365366f2013-01-23 02:09:06 +00001608 // Extract_vec (Build_vector) generated by custom lowering
1609 // also needs to be customly combined
1610 case ISD::EXTRACT_VECTOR_ELT: {
1611 SDValue Arg = N->getOperand(0);
1612 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1613 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1614 unsigned Element = Const->getZExtValue();
1615 return Arg->getOperand(Element);
1616 }
1617 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001618 if (Arg.getOpcode() == ISD::BITCAST &&
1619 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1620 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1621 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001622 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001623 Arg->getOperand(0).getOperand(Element));
1624 }
1625 }
Tom Stellard365366f2013-01-23 02:09:06 +00001626 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001627
1628 case ISD::SELECT_CC: {
1629 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1630 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001631 //
1632 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1633 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001634 SDValue LHS = N->getOperand(0);
1635 if (LHS.getOpcode() != ISD::SELECT_CC) {
1636 return SDValue();
1637 }
1638
1639 SDValue RHS = N->getOperand(1);
1640 SDValue True = N->getOperand(2);
1641 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001642 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001643
1644 if (LHS.getOperand(2).getNode() != True.getNode() ||
1645 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001646 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001647 return SDValue();
1648 }
1649
Tom Stellard5e524892013-03-08 15:37:11 +00001650 switch (NCC) {
1651 default: return SDValue();
1652 case ISD::SETNE: return LHS;
1653 case ISD::SETEQ: {
1654 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1655 LHSCC = ISD::getSetCCInverse(LHSCC,
1656 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001657 if (DCI.isBeforeLegalizeOps() ||
1658 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1659 return DAG.getSelectCC(SDLoc(N),
1660 LHS.getOperand(0),
1661 LHS.getOperand(1),
1662 LHS.getOperand(2),
1663 LHS.getOperand(3),
1664 LHSCC);
1665 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001666 }
Tom Stellard5e524892013-03-08 15:37:11 +00001667 }
Tom Stellardcd428182013-09-28 02:50:38 +00001668 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001669 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001670
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001671 case AMDGPUISD::EXPORT: {
1672 SDValue Arg = N->getOperand(1);
1673 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1674 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001675
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001676 SDValue NewArgs[8] = {
1677 N->getOperand(0), // Chain
1678 SDValue(),
1679 N->getOperand(2), // ArrayBase
1680 N->getOperand(3), // Type
1681 N->getOperand(4), // SWZ_X
1682 N->getOperand(5), // SWZ_Y
1683 N->getOperand(6), // SWZ_Z
1684 N->getOperand(7) // SWZ_W
1685 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001686 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001687 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001688 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001689 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001690 case AMDGPUISD::TEXTURE_FETCH: {
1691 SDValue Arg = N->getOperand(1);
1692 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1693 break;
1694
1695 SDValue NewArgs[19] = {
1696 N->getOperand(0),
1697 N->getOperand(1),
1698 N->getOperand(2),
1699 N->getOperand(3),
1700 N->getOperand(4),
1701 N->getOperand(5),
1702 N->getOperand(6),
1703 N->getOperand(7),
1704 N->getOperand(8),
1705 N->getOperand(9),
1706 N->getOperand(10),
1707 N->getOperand(11),
1708 N->getOperand(12),
1709 N->getOperand(13),
1710 N->getOperand(14),
1711 N->getOperand(15),
1712 N->getOperand(16),
1713 N->getOperand(17),
1714 N->getOperand(18),
1715 };
1716 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1717 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1718 NewArgs, 19);
1719 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001720 }
1721 return SDValue();
1722}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001723
1724static bool
1725FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001726 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001727 const R600InstrInfo *TII =
1728 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1729 if (!Src.isMachineOpcode())
1730 return false;
1731 switch (Src.getMachineOpcode()) {
1732 case AMDGPU::FNEG_R600:
1733 if (!Neg.getNode())
1734 return false;
1735 Src = Src.getOperand(0);
1736 Neg = DAG.getTargetConstant(1, MVT::i32);
1737 return true;
1738 case AMDGPU::FABS_R600:
1739 if (!Abs.getNode())
1740 return false;
1741 Src = Src.getOperand(0);
1742 Abs = DAG.getTargetConstant(1, MVT::i32);
1743 return true;
1744 case AMDGPU::CONST_COPY: {
1745 unsigned Opcode = ParentNode->getMachineOpcode();
1746 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1747
1748 if (!Sel.getNode())
1749 return false;
1750
1751 SDValue CstOffset = Src.getOperand(0);
1752 if (ParentNode->getValueType(0).isVector())
1753 return false;
1754
1755 // Gather constants values
1756 int SrcIndices[] = {
1757 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1758 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1759 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1760 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1761 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1762 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1763 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1764 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1765 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1766 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1767 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1768 };
1769 std::vector<unsigned> Consts;
1770 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1771 int OtherSrcIdx = SrcIndices[i];
1772 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1773 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1774 continue;
1775 if (HasDst) {
1776 OtherSrcIdx--;
1777 OtherSelIdx--;
1778 }
1779 if (RegisterSDNode *Reg =
1780 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1781 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1782 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1783 ParentNode->getOperand(OtherSelIdx));
1784 Consts.push_back(Cst->getZExtValue());
1785 }
1786 }
1787 }
1788
1789 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1790 Consts.push_back(Cst->getZExtValue());
1791 if (!TII->fitsConstReadLimitations(Consts)) {
1792 return false;
1793 }
1794
1795 Sel = CstOffset;
1796 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1797 return true;
1798 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001799 case AMDGPU::MOV_IMM_I32:
1800 case AMDGPU::MOV_IMM_F32: {
1801 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1802 uint64_t ImmValue = 0;
1803
1804
1805 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1806 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1807 float FloatValue = FPC->getValueAPF().convertToFloat();
1808 if (FloatValue == 0.0) {
1809 ImmReg = AMDGPU::ZERO;
1810 } else if (FloatValue == 0.5) {
1811 ImmReg = AMDGPU::HALF;
1812 } else if (FloatValue == 1.0) {
1813 ImmReg = AMDGPU::ONE;
1814 } else {
1815 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1816 }
1817 } else {
1818 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1819 uint64_t Value = C->getZExtValue();
1820 if (Value == 0) {
1821 ImmReg = AMDGPU::ZERO;
1822 } else if (Value == 1) {
1823 ImmReg = AMDGPU::ONE_INT;
1824 } else {
1825 ImmValue = Value;
1826 }
1827 }
1828
1829 // Check that we aren't already using an immediate.
1830 // XXX: It's possible for an instruction to have more than one
1831 // immediate operand, but this is not supported yet.
1832 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1833 if (!Imm.getNode())
1834 return false;
1835 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1836 assert(C);
1837 if (C->getZExtValue())
1838 return false;
1839 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1840 }
1841 Src = DAG.getRegister(ImmReg, MVT::i32);
1842 return true;
1843 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001844 default:
1845 return false;
1846 }
1847}
1848
1849
1850/// \brief Fold the instructions after selecting them
1851SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1852 SelectionDAG &DAG) const {
1853 const R600InstrInfo *TII =
1854 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1855 if (!Node->isMachineOpcode())
1856 return Node;
1857 unsigned Opcode = Node->getMachineOpcode();
1858 SDValue FakeOp;
1859
1860 std::vector<SDValue> Ops;
1861 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1862 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001863 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001864
1865 if (Opcode == AMDGPU::DOT_4) {
1866 int OperandIdx[] = {
1867 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1868 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1869 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1870 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1871 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1872 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1873 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1874 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001875 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001876 int NegIdx[] = {
1877 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1878 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1879 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1880 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1881 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1883 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1884 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1885 };
1886 int AbsIdx[] = {
1887 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1888 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1889 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1890 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1891 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1893 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1894 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1895 };
1896 for (unsigned i = 0; i < 8; i++) {
1897 if (OperandIdx[i] < 0)
1898 return Node;
1899 SDValue &Src = Ops[OperandIdx[i] - 1];
1900 SDValue &Neg = Ops[NegIdx[i] - 1];
1901 SDValue &Abs = Ops[AbsIdx[i] - 1];
1902 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1903 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1904 if (HasDst)
1905 SelIdx--;
1906 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001907 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1908 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1909 }
1910 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1911 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1912 SDValue &Src = Ops[i];
1913 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001914 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1915 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001916 } else if (Opcode == AMDGPU::CLAMP_R600) {
1917 SDValue Src = Node->getOperand(0);
1918 if (!Src.isMachineOpcode() ||
1919 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1920 return Node;
1921 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1922 AMDGPU::OpName::clamp);
1923 if (ClampIdx < 0)
1924 return Node;
1925 std::vector<SDValue> Ops;
1926 unsigned NumOp = Src.getNumOperands();
1927 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001928 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001929 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1930 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1931 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001932 } else {
1933 if (!TII->hasInstrModifiers(Opcode))
1934 return Node;
1935 int OperandIdx[] = {
1936 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1937 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1938 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1939 };
1940 int NegIdx[] = {
1941 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1942 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1943 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1944 };
1945 int AbsIdx[] = {
1946 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1947 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1948 -1
1949 };
1950 for (unsigned i = 0; i < 3; i++) {
1951 if (OperandIdx[i] < 0)
1952 return Node;
1953 SDValue &Src = Ops[OperandIdx[i] - 1];
1954 SDValue &Neg = Ops[NegIdx[i] - 1];
1955 SDValue FakeAbs;
1956 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1957 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1958 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001959 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1960 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001961 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001962 ImmIdx--;
1963 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001964 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001965 SDValue &Imm = Ops[ImmIdx];
1966 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001967 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1968 }
1969 }
1970
1971 return Node;
1972}