blob: ac4a81c9ac576e3db79b87896795533ad5494a46 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000023#include "llvm/IR/Argument.h"
24#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025
26using namespace llvm;
27
28R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000029 AMDGPUTargetLowering(TM),
30 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000031 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35 computeRegisterProperties();
36
37 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
40 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
41
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000042 setOperationAction(ISD::FCOS, MVT::f32, Custom);
43 setOperationAction(ISD::FSIN, MVT::f32, Custom);
44
Tom Stellarda8b03512012-12-21 16:33:24 +000045 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
46 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
47 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
48 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
50
Tom Stellard492ebea2013-03-08 15:37:07 +000051 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
52 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000053
54 setOperationAction(ISD::FSUB, MVT::f32, Expand);
55
56 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
57 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
58 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000059
Tom Stellard75aadc22012-12-11 21:25:42 +000060 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
61 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
62
Tom Stellarde8f9f282013-03-08 15:37:05 +000063 setOperationAction(ISD::SETCC, MVT::i32, Expand);
64 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
66
67 setOperationAction(ISD::SELECT, MVT::i32, Custom);
68 setOperationAction(ISD::SELECT, MVT::f32, Custom);
69
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000070 // Legalize loads and stores to the private address space.
71 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000072 setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000073 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard33dd04b2013-07-23 01:47:52 +000074 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
75 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
76 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Expand);
77 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000078 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000080 setOperationAction(ISD::STORE, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000081 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
82
Tom Stellard365366f2013-01-23 02:09:06 +000083 setOperationAction(ISD::LOAD, MVT::i32, Custom);
84 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000085 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
86
Tom Stellard75aadc22012-12-11 21:25:42 +000087 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000088 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000089 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000090 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +000091
Michel Danzer49812b52013-07-10 16:37:07 +000092 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
93
Tom Stellardb852af52013-03-08 15:37:03 +000094 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000095 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +000096 setSchedulingPreference(Sched::VLIW);
97}
98
99MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
100 MachineInstr * MI, MachineBasicBlock * BB) const {
101 MachineFunction * MF = BB->getParent();
102 MachineRegisterInfo &MRI = MF->getRegInfo();
103 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000104 const R600InstrInfo *TII =
105 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000106
107 switch (MI->getOpcode()) {
108 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109 case AMDGPU::CLAMP_R600: {
110 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
111 AMDGPU::MOV,
112 MI->getOperand(0).getReg(),
113 MI->getOperand(1).getReg());
114 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
115 break;
116 }
117
118 case AMDGPU::FABS_R600: {
119 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
120 AMDGPU::MOV,
121 MI->getOperand(0).getReg(),
122 MI->getOperand(1).getReg());
123 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
124 break;
125 }
126
127 case AMDGPU::FNEG_R600: {
128 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
129 AMDGPU::MOV,
130 MI->getOperand(0).getReg(),
131 MI->getOperand(1).getReg());
132 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
133 break;
134 }
135
Tom Stellard75aadc22012-12-11 21:25:42 +0000136 case AMDGPU::MASK_WRITE: {
137 unsigned maskedRegister = MI->getOperand(0).getReg();
138 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
139 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
140 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
141 break;
142 }
143
Tom Stellardc026e8b2013-06-28 15:47:08 +0000144 case AMDGPU::LDS_READ_RET: {
145 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
146 TII->get(MI->getOpcode()),
147 AMDGPU::OQAP);
148 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149 NewMI.addOperand(MI->getOperand(i));
150 }
151 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
152 MI->getOperand(0).getReg(),
153 AMDGPU::OQAP);
154 break;
155 }
156
Tom Stellard75aadc22012-12-11 21:25:42 +0000157 case AMDGPU::MOV_IMM_F32:
158 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
159 MI->getOperand(1).getFPImm()->getValueAPF()
160 .bitcastToAPInt().getZExtValue());
161 break;
162 case AMDGPU::MOV_IMM_I32:
163 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
164 MI->getOperand(1).getImm());
165 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000166 case AMDGPU::CONST_COPY: {
167 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
168 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000169 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000170 MI->getOperand(1).getImm());
171 break;
172 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000173
174 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
175 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
176 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
177
178 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
179 .addOperand(MI->getOperand(0))
180 .addOperand(MI->getOperand(1))
181 .addImm(EOP); // Set End of program bit
182 break;
183 }
184
Tom Stellard75aadc22012-12-11 21:25:42 +0000185 case AMDGPU::TXD: {
186 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
187 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000188 MachineOperand &RID = MI->getOperand(4);
189 MachineOperand &SID = MI->getOperand(5);
190 unsigned TextureId = MI->getOperand(6).getImm();
191 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
192 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000193
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000194 switch (TextureId) {
195 case 5: // Rect
196 CTX = CTY = 0;
197 break;
198 case 6: // Shadow1D
199 SrcW = SrcZ;
200 break;
201 case 7: // Shadow2D
202 SrcW = SrcZ;
203 break;
204 case 8: // ShadowRect
205 CTX = CTY = 0;
206 SrcW = SrcZ;
207 break;
208 case 9: // 1DArray
209 SrcZ = SrcY;
210 CTZ = 0;
211 break;
212 case 10: // 2DArray
213 CTZ = 0;
214 break;
215 case 11: // Shadow1DArray
216 SrcZ = SrcY;
217 CTZ = 0;
218 break;
219 case 12: // Shadow2DArray
220 CTZ = 0;
221 break;
222 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000223 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
224 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000225 .addImm(SrcX)
226 .addImm(SrcY)
227 .addImm(SrcZ)
228 .addImm(SrcW)
229 .addImm(0)
230 .addImm(0)
231 .addImm(0)
232 .addImm(0)
233 .addImm(1)
234 .addImm(2)
235 .addImm(3)
236 .addOperand(RID)
237 .addOperand(SID)
238 .addImm(CTX)
239 .addImm(CTY)
240 .addImm(CTZ)
241 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000242 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
243 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000244 .addImm(SrcX)
245 .addImm(SrcY)
246 .addImm(SrcZ)
247 .addImm(SrcW)
248 .addImm(0)
249 .addImm(0)
250 .addImm(0)
251 .addImm(0)
252 .addImm(1)
253 .addImm(2)
254 .addImm(3)
255 .addOperand(RID)
256 .addOperand(SID)
257 .addImm(CTX)
258 .addImm(CTY)
259 .addImm(CTZ)
260 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000261 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
262 .addOperand(MI->getOperand(0))
263 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000264 .addImm(SrcX)
265 .addImm(SrcY)
266 .addImm(SrcZ)
267 .addImm(SrcW)
268 .addImm(0)
269 .addImm(0)
270 .addImm(0)
271 .addImm(0)
272 .addImm(1)
273 .addImm(2)
274 .addImm(3)
275 .addOperand(RID)
276 .addOperand(SID)
277 .addImm(CTX)
278 .addImm(CTY)
279 .addImm(CTZ)
280 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000281 .addReg(T0, RegState::Implicit)
282 .addReg(T1, RegState::Implicit);
283 break;
284 }
285
286 case AMDGPU::TXD_SHADOW: {
287 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
288 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000289 MachineOperand &RID = MI->getOperand(4);
290 MachineOperand &SID = MI->getOperand(5);
291 unsigned TextureId = MI->getOperand(6).getImm();
292 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
293 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
294
295 switch (TextureId) {
296 case 5: // Rect
297 CTX = CTY = 0;
298 break;
299 case 6: // Shadow1D
300 SrcW = SrcZ;
301 break;
302 case 7: // Shadow2D
303 SrcW = SrcZ;
304 break;
305 case 8: // ShadowRect
306 CTX = CTY = 0;
307 SrcW = SrcZ;
308 break;
309 case 9: // 1DArray
310 SrcZ = SrcY;
311 CTZ = 0;
312 break;
313 case 10: // 2DArray
314 CTZ = 0;
315 break;
316 case 11: // Shadow1DArray
317 SrcZ = SrcY;
318 CTZ = 0;
319 break;
320 case 12: // Shadow2DArray
321 CTZ = 0;
322 break;
323 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000324
325 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
326 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000327 .addImm(SrcX)
328 .addImm(SrcY)
329 .addImm(SrcZ)
330 .addImm(SrcW)
331 .addImm(0)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(1)
336 .addImm(2)
337 .addImm(3)
338 .addOperand(RID)
339 .addOperand(SID)
340 .addImm(CTX)
341 .addImm(CTY)
342 .addImm(CTZ)
343 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000344 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
345 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000346 .addImm(SrcX)
347 .addImm(SrcY)
348 .addImm(SrcZ)
349 .addImm(SrcW)
350 .addImm(0)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(1)
355 .addImm(2)
356 .addImm(3)
357 .addOperand(RID)
358 .addOperand(SID)
359 .addImm(CTX)
360 .addImm(CTY)
361 .addImm(CTZ)
362 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000363 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
364 .addOperand(MI->getOperand(0))
365 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000366 .addImm(SrcX)
367 .addImm(SrcY)
368 .addImm(SrcZ)
369 .addImm(SrcW)
370 .addImm(0)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(1)
375 .addImm(2)
376 .addImm(3)
377 .addOperand(RID)
378 .addOperand(SID)
379 .addImm(CTX)
380 .addImm(CTY)
381 .addImm(CTZ)
382 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000383 .addReg(T0, RegState::Implicit)
384 .addReg(T1, RegState::Implicit);
385 break;
386 }
387
388 case AMDGPU::BRANCH:
389 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000390 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000391 break;
392
393 case AMDGPU::BRANCH_COND_f32: {
394 MachineInstr *NewMI =
395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
396 AMDGPU::PREDICATE_BIT)
397 .addOperand(MI->getOperand(1))
398 .addImm(OPCODE_IS_NOT_ZERO)
399 .addImm(0); // Flags
400 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000401 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000402 .addOperand(MI->getOperand(0))
403 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
404 break;
405 }
406
407 case AMDGPU::BRANCH_COND_i32: {
408 MachineInstr *NewMI =
409 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
410 AMDGPU::PREDICATE_BIT)
411 .addOperand(MI->getOperand(1))
412 .addImm(OPCODE_IS_NOT_ZERO_INT)
413 .addImm(0); // Flags
414 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000415 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000416 .addOperand(MI->getOperand(0))
417 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
418 break;
419 }
420
Tom Stellard75aadc22012-12-11 21:25:42 +0000421 case AMDGPU::EG_ExportSwz:
422 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000423 // Instruction is left unmodified if its not the last one of its type
424 bool isLastInstructionOfItsType = true;
425 unsigned InstExportType = MI->getOperand(1).getImm();
426 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
427 EndBlock = BB->end(); NextExportInst != EndBlock;
428 NextExportInst = llvm::next(NextExportInst)) {
429 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
430 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
431 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
432 .getImm();
433 if (CurrentInstExportType == InstExportType) {
434 isLastInstructionOfItsType = false;
435 break;
436 }
437 }
438 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000439 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000440 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000441 return BB;
442 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
444 .addOperand(MI->getOperand(0))
445 .addOperand(MI->getOperand(1))
446 .addOperand(MI->getOperand(2))
447 .addOperand(MI->getOperand(3))
448 .addOperand(MI->getOperand(4))
449 .addOperand(MI->getOperand(5))
450 .addOperand(MI->getOperand(6))
451 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000452 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 break;
454 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000455 case AMDGPU::RETURN: {
456 // RETURN instructions must have the live-out registers as implicit uses,
457 // otherwise they appear dead.
458 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
459 MachineInstrBuilder MIB(*MF, MI);
460 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
461 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
462 return BB;
463 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000464 }
465
466 MI->eraseFromParent();
467 return BB;
468}
469
470//===----------------------------------------------------------------------===//
471// Custom DAG Lowering Operations
472//===----------------------------------------------------------------------===//
473
Tom Stellard75aadc22012-12-11 21:25:42 +0000474SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000475 MachineFunction &MF = DAG.getMachineFunction();
476 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000477 switch (Op.getOpcode()) {
478 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000479 case ISD::FCOS:
480 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000481 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
482 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000483 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000484 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000485 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000486 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 case ISD::INTRINSIC_VOID: {
488 SDValue Chain = Op.getOperand(0);
489 unsigned IntrinsicID =
490 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
491 switch (IntrinsicID) {
492 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000493 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
494 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000495 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000496 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000497 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000498 case AMDGPUIntrinsic::R600_store_swizzle: {
499 const SDValue Args[8] = {
500 Chain,
501 Op.getOperand(2), // Export Value
502 Op.getOperand(3), // ArrayBase
503 Op.getOperand(4), // Type
504 DAG.getConstant(0, MVT::i32), // SWZ_X
505 DAG.getConstant(1, MVT::i32), // SWZ_Y
506 DAG.getConstant(2, MVT::i32), // SWZ_Z
507 DAG.getConstant(3, MVT::i32) // SWZ_W
508 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000509 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000510 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000512
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 // default for switch(IntrinsicID)
514 default: break;
515 }
516 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
517 break;
518 }
519 case ISD::INTRINSIC_WO_CHAIN: {
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
522 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000523 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 switch(IntrinsicID) {
525 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
526 case AMDGPUIntrinsic::R600_load_input: {
527 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
528 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000529 MachineFunction &MF = DAG.getMachineFunction();
530 MachineRegisterInfo &MRI = MF.getRegInfo();
531 MRI.addLiveIn(Reg);
532 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000533 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000535
536 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000538 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
539 MachineSDNode *interp;
540 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000541 const MachineFunction &MF = DAG.getMachineFunction();
542 const R600InstrInfo *TII =
543 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000544 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
545 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
546 return DAG.getTargetExtractSubreg(
547 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
548 DL, MVT::f32, SDValue(interp, 0));
549 }
550
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000551 MachineFunction &MF = DAG.getMachineFunction();
552 MachineRegisterInfo &MRI = MF.getRegInfo();
553 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
554 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
555 MRI.addLiveIn(RegisterI);
556 MRI.addLiveIn(RegisterJ);
557 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
558 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
559 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
560 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
561
Tom Stellard41afe6a2013-02-05 17:09:14 +0000562 if (slot % 4 < 2)
563 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
564 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000565 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000566 else
567 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
568 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000569 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000570 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000571 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000572 case AMDGPUIntrinsic::R600_tex:
573 case AMDGPUIntrinsic::R600_texc:
574 case AMDGPUIntrinsic::R600_txl:
575 case AMDGPUIntrinsic::R600_txlc:
576 case AMDGPUIntrinsic::R600_txb:
577 case AMDGPUIntrinsic::R600_txbc:
578 case AMDGPUIntrinsic::R600_txf:
579 case AMDGPUIntrinsic::R600_txq:
580 case AMDGPUIntrinsic::R600_ddx:
581 case AMDGPUIntrinsic::R600_ddy: {
582 unsigned TextureOp;
583 switch (IntrinsicID) {
584 case AMDGPUIntrinsic::R600_tex:
585 TextureOp = 0;
586 break;
587 case AMDGPUIntrinsic::R600_texc:
588 TextureOp = 1;
589 break;
590 case AMDGPUIntrinsic::R600_txl:
591 TextureOp = 2;
592 break;
593 case AMDGPUIntrinsic::R600_txlc:
594 TextureOp = 3;
595 break;
596 case AMDGPUIntrinsic::R600_txb:
597 TextureOp = 4;
598 break;
599 case AMDGPUIntrinsic::R600_txbc:
600 TextureOp = 5;
601 break;
602 case AMDGPUIntrinsic::R600_txf:
603 TextureOp = 6;
604 break;
605 case AMDGPUIntrinsic::R600_txq:
606 TextureOp = 7;
607 break;
608 case AMDGPUIntrinsic::R600_ddx:
609 TextureOp = 8;
610 break;
611 case AMDGPUIntrinsic::R600_ddy:
612 TextureOp = 9;
613 break;
614 default:
615 llvm_unreachable("Unknow Texture Operation");
616 }
617
618 SDValue TexArgs[19] = {
619 DAG.getConstant(TextureOp, MVT::i32),
620 Op.getOperand(1),
621 DAG.getConstant(0, MVT::i32),
622 DAG.getConstant(1, MVT::i32),
623 DAG.getConstant(2, MVT::i32),
624 DAG.getConstant(3, MVT::i32),
625 Op.getOperand(2),
626 Op.getOperand(3),
627 Op.getOperand(4),
628 DAG.getConstant(0, MVT::i32),
629 DAG.getConstant(1, MVT::i32),
630 DAG.getConstant(2, MVT::i32),
631 DAG.getConstant(3, MVT::i32),
632 Op.getOperand(5),
633 Op.getOperand(6),
634 Op.getOperand(7),
635 Op.getOperand(8),
636 Op.getOperand(9),
637 Op.getOperand(10)
638 };
639 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
640 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000641 case AMDGPUIntrinsic::AMDGPU_dp4: {
642 SDValue Args[8] = {
643 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
644 DAG.getConstant(0, MVT::i32)),
645 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
646 DAG.getConstant(0, MVT::i32)),
647 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
648 DAG.getConstant(1, MVT::i32)),
649 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
650 DAG.getConstant(1, MVT::i32)),
651 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
652 DAG.getConstant(2, MVT::i32)),
653 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
654 DAG.getConstant(2, MVT::i32)),
655 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
656 DAG.getConstant(3, MVT::i32)),
657 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
658 DAG.getConstant(3, MVT::i32))
659 };
660 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
661 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000662
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000663 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000664 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000665 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000666 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000667 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000668 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000669 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000670 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000671 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000672 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000673 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000674 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000675 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000676 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000677 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000678 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000679 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000680 return LowerImplicitParameter(DAG, VT, DL, 8);
681
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000682 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000683 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
684 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000685 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000686 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
687 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000688 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000689 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
690 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000691 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000692 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
693 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000694 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000695 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
696 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000697 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000698 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
699 AMDGPU::T0_Z, VT);
700 }
701 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
702 break;
703 }
704 } // end switch(Op.getOpcode())
705 return SDValue();
706}
707
708void R600TargetLowering::ReplaceNodeResults(SDNode *N,
709 SmallVectorImpl<SDValue> &Results,
710 SelectionDAG &DAG) const {
711 switch (N->getOpcode()) {
712 default: return;
713 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000714 return;
715 case ISD::LOAD: {
716 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
717 Results.push_back(SDValue(Node, 0));
718 Results.push_back(SDValue(Node, 1));
719 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
720 // function
721 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
722 return;
723 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000724 case ISD::STORE:
725 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
726 Results.push_back(SDValue(Node, 0));
727 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000728 }
729}
730
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000731SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
732 // On hw >= R700, COS/SIN input must be between -1. and 1.
733 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
734 EVT VT = Op.getValueType();
735 SDValue Arg = Op.getOperand(0);
736 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
737 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
738 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
739 DAG.getConstantFP(0.15915494309, MVT::f32)),
740 DAG.getConstantFP(0.5, MVT::f32)));
741 unsigned TrigNode;
742 switch (Op.getOpcode()) {
743 case ISD::FCOS:
744 TrigNode = AMDGPUISD::COS_HW;
745 break;
746 case ISD::FSIN:
747 TrigNode = AMDGPUISD::SIN_HW;
748 break;
749 default:
750 llvm_unreachable("Wrong trig opcode");
751 }
752 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
753 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
754 DAG.getConstantFP(-0.5, MVT::f32)));
755 if (Gen >= AMDGPUSubtarget::R700)
756 return TrigVal;
757 // On R600 hw, COS/SIN input must be between -Pi and Pi.
758 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
759 DAG.getConstantFP(3.14159265359, MVT::f32));
760}
761
Tom Stellard75aadc22012-12-11 21:25:42 +0000762SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
763 return DAG.getNode(
764 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000765 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 MVT::i1,
767 Op, DAG.getConstantFP(0.0f, MVT::f32),
768 DAG.getCondCode(ISD::SETNE)
769 );
770}
771
Tom Stellard75aadc22012-12-11 21:25:42 +0000772SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000773 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 unsigned DwordOffset) const {
775 unsigned ByteOffset = DwordOffset * 4;
776 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
777 AMDGPUAS::PARAM_I_ADDRESS);
778
779 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
780 assert(isInt<16>(ByteOffset));
781
782 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
783 DAG.getConstant(ByteOffset, MVT::i32), // PTR
784 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
785 false, false, false, 0);
786}
787
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000788SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
789
790 MachineFunction &MF = DAG.getMachineFunction();
791 const AMDGPUFrameLowering *TFL =
792 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
793
794 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
795 assert(FIN);
796
797 unsigned FrameIndex = FIN->getIndex();
798 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
799 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
800}
801
Tom Stellard75aadc22012-12-11 21:25:42 +0000802bool R600TargetLowering::isZero(SDValue Op) const {
803 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
804 return Cst->isNullValue();
805 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
806 return CstFP->isZero();
807 } else {
808 return false;
809 }
810}
811
812SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000813 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 EVT VT = Op.getValueType();
815
816 SDValue LHS = Op.getOperand(0);
817 SDValue RHS = Op.getOperand(1);
818 SDValue True = Op.getOperand(2);
819 SDValue False = Op.getOperand(3);
820 SDValue CC = Op.getOperand(4);
821 SDValue Temp;
822
823 // LHS and RHS are guaranteed to be the same value type
824 EVT CompareVT = LHS.getValueType();
825
826 // Check if we can lower this to a native operation.
827
Tom Stellard2add82d2013-03-08 15:37:09 +0000828 // Try to lower to a SET* instruction:
829 //
830 // SET* can match the following patterns:
831 //
832 // select_cc f32, f32, -1, 0, cc_any
833 // select_cc f32, f32, 1.0f, 0.0f, cc_any
834 // select_cc i32, i32, -1, 0, cc_any
835 //
836
837 // Move hardware True/False values to the correct operand.
838 if (isHWTrueValue(False) && isHWFalseValue(True)) {
839 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
840 std::swap(False, True);
841 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
842 }
843
844 if (isHWTrueValue(True) && isHWFalseValue(False) &&
845 (CompareVT == VT || VT == MVT::i32)) {
846 // This can be matched by a SET* instruction.
847 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
848 }
849
Tom Stellard75aadc22012-12-11 21:25:42 +0000850 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000851 //
852 // CND* can match the following patterns:
853 //
854 // select_cc f32, 0.0, f32, f32, cc_any
855 // select_cc f32, 0.0, i32, i32, cc_any
856 // select_cc i32, 0, f32, f32, cc_any
857 // select_cc i32, 0, i32, i32, cc_any
858 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000859 if (isZero(LHS) || isZero(RHS)) {
860 SDValue Cond = (isZero(LHS) ? RHS : LHS);
861 SDValue Zero = (isZero(LHS) ? LHS : RHS);
862 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
863 if (CompareVT != VT) {
864 // Bitcast True / False to the correct types. This will end up being
865 // a nop, but it allows us to define only a single pattern in the
866 // .TD files for each CND* instruction rather than having to have
867 // one pattern for integer True/False and one for fp True/False
868 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
869 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
870 }
871 if (isZero(LHS)) {
872 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
873 }
874
875 switch (CCOpcode) {
876 case ISD::SETONE:
877 case ISD::SETUNE:
878 case ISD::SETNE:
879 case ISD::SETULE:
880 case ISD::SETULT:
881 case ISD::SETOLE:
882 case ISD::SETOLT:
883 case ISD::SETLE:
884 case ISD::SETLT:
885 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
886 Temp = True;
887 True = False;
888 False = Temp;
889 break;
890 default:
891 break;
892 }
893 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
894 Cond, Zero,
895 True, False,
896 DAG.getCondCode(CCOpcode));
897 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
898 }
899
Tom Stellard75aadc22012-12-11 21:25:42 +0000900
901 // Possible Min/Max pattern
902 SDValue MinMax = LowerMinMax(Op, DAG);
903 if (MinMax.getNode()) {
904 return MinMax;
905 }
906
907 // If we make it this for it means we have no native instructions to handle
908 // this SELECT_CC, so we must lower it.
909 SDValue HWTrue, HWFalse;
910
911 if (CompareVT == MVT::f32) {
912 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
913 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
914 } else if (CompareVT == MVT::i32) {
915 HWTrue = DAG.getConstant(-1, CompareVT);
916 HWFalse = DAG.getConstant(0, CompareVT);
917 }
918 else {
919 assert(!"Unhandled value type in LowerSELECT_CC");
920 }
921
922 // Lower this unsupported SELECT_CC into a combination of two supported
923 // SELECT_CC operations.
924 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
925
926 return DAG.getNode(ISD::SELECT_CC, DL, VT,
927 Cond, HWFalse,
928 True, False,
929 DAG.getCondCode(ISD::SETNE));
930}
931
932SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
933 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000934 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000935 Op.getValueType(),
936 Op.getOperand(0),
937 DAG.getConstant(0, MVT::i32),
938 Op.getOperand(1),
939 Op.getOperand(2),
940 DAG.getCondCode(ISD::SETNE));
941}
942
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000943/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
944/// convert these pointers to a register index. Each register holds
945/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
946/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
947/// for indirect addressing.
948SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
949 unsigned StackWidth,
950 SelectionDAG &DAG) const {
951 unsigned SRLPad;
952 switch(StackWidth) {
953 case 1:
954 SRLPad = 2;
955 break;
956 case 2:
957 SRLPad = 3;
958 break;
959 case 4:
960 SRLPad = 4;
961 break;
962 default: llvm_unreachable("Invalid stack width");
963 }
964
Andrew Trickef9de2a2013-05-25 02:42:55 +0000965 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000966 DAG.getConstant(SRLPad, MVT::i32));
967}
968
969void R600TargetLowering::getStackAddress(unsigned StackWidth,
970 unsigned ElemIdx,
971 unsigned &Channel,
972 unsigned &PtrIncr) const {
973 switch (StackWidth) {
974 default:
975 case 1:
976 Channel = 0;
977 if (ElemIdx > 0) {
978 PtrIncr = 1;
979 } else {
980 PtrIncr = 0;
981 }
982 break;
983 case 2:
984 Channel = ElemIdx % 2;
985 if (ElemIdx == 2) {
986 PtrIncr = 1;
987 } else {
988 PtrIncr = 0;
989 }
990 break;
991 case 4:
992 Channel = ElemIdx;
993 PtrIncr = 0;
994 break;
995 }
996}
997
Tom Stellard75aadc22012-12-11 21:25:42 +0000998SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000999 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001000 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1001 SDValue Chain = Op.getOperand(0);
1002 SDValue Value = Op.getOperand(1);
1003 SDValue Ptr = Op.getOperand(2);
1004
1005 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
1006 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
1007 // Convert pointer from byte address to dword address.
1008 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1009 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1010 Ptr, DAG.getConstant(2, MVT::i32)));
1011
1012 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1013 assert(!"Truncated and indexed stores not supported yet");
1014 } else {
1015 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1016 }
1017 return Chain;
1018 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001019
1020 EVT ValueVT = Value.getValueType();
1021
1022 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1023 return SDValue();
1024 }
1025
1026 // Lowering for indirect addressing
1027
1028 const MachineFunction &MF = DAG.getMachineFunction();
1029 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1030 getTargetMachine().getFrameLowering());
1031 unsigned StackWidth = TFL->getStackWidth(MF);
1032
1033 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1034
1035 if (ValueVT.isVector()) {
1036 unsigned NumElemVT = ValueVT.getVectorNumElements();
1037 EVT ElemVT = ValueVT.getVectorElementType();
1038 SDValue Stores[4];
1039
1040 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1041 "vector width in load");
1042
1043 for (unsigned i = 0; i < NumElemVT; ++i) {
1044 unsigned Channel, PtrIncr;
1045 getStackAddress(StackWidth, i, Channel, PtrIncr);
1046 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1047 DAG.getConstant(PtrIncr, MVT::i32));
1048 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1049 Value, DAG.getConstant(i, MVT::i32));
1050
1051 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1052 Chain, Elem, Ptr,
1053 DAG.getTargetConstant(Channel, MVT::i32));
1054 }
1055 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1056 } else {
1057 if (ValueVT == MVT::i8) {
1058 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1059 }
1060 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001061 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001062 }
1063
1064 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001065}
1066
Tom Stellard365366f2013-01-23 02:09:06 +00001067// return (512 + (kc_bank << 12)
1068static int
1069ConstantAddressBlock(unsigned AddressSpace) {
1070 switch (AddressSpace) {
1071 case AMDGPUAS::CONSTANT_BUFFER_0:
1072 return 512;
1073 case AMDGPUAS::CONSTANT_BUFFER_1:
1074 return 512 + 4096;
1075 case AMDGPUAS::CONSTANT_BUFFER_2:
1076 return 512 + 4096 * 2;
1077 case AMDGPUAS::CONSTANT_BUFFER_3:
1078 return 512 + 4096 * 3;
1079 case AMDGPUAS::CONSTANT_BUFFER_4:
1080 return 512 + 4096 * 4;
1081 case AMDGPUAS::CONSTANT_BUFFER_5:
1082 return 512 + 4096 * 5;
1083 case AMDGPUAS::CONSTANT_BUFFER_6:
1084 return 512 + 4096 * 6;
1085 case AMDGPUAS::CONSTANT_BUFFER_7:
1086 return 512 + 4096 * 7;
1087 case AMDGPUAS::CONSTANT_BUFFER_8:
1088 return 512 + 4096 * 8;
1089 case AMDGPUAS::CONSTANT_BUFFER_9:
1090 return 512 + 4096 * 9;
1091 case AMDGPUAS::CONSTANT_BUFFER_10:
1092 return 512 + 4096 * 10;
1093 case AMDGPUAS::CONSTANT_BUFFER_11:
1094 return 512 + 4096 * 11;
1095 case AMDGPUAS::CONSTANT_BUFFER_12:
1096 return 512 + 4096 * 12;
1097 case AMDGPUAS::CONSTANT_BUFFER_13:
1098 return 512 + 4096 * 13;
1099 case AMDGPUAS::CONSTANT_BUFFER_14:
1100 return 512 + 4096 * 14;
1101 case AMDGPUAS::CONSTANT_BUFFER_15:
1102 return 512 + 4096 * 15;
1103 default:
1104 return -1;
1105 }
1106}
1107
1108SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1109{
1110 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001111 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001112 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1113 SDValue Chain = Op.getOperand(0);
1114 SDValue Ptr = Op.getOperand(1);
1115 SDValue LoweredLoad;
1116
1117 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1118 if (ConstantBlock > -1) {
1119 SDValue Result;
1120 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001121 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1122 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001123 SDValue Slots[4];
1124 for (unsigned i = 0; i < 4; i++) {
1125 // We want Const position encoded with the following formula :
1126 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1127 // const_index is Ptr computed by llvm using an alignment of 16.
1128 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1129 // then div by 4 at the ISel step
1130 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1131 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1132 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1133 }
1134 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1135 } else {
1136 // non constant ptr cant be folded, keeps it as a v4f32 load
1137 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001138 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001139 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001140 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001141 );
1142 }
1143
1144 if (!VT.isVector()) {
1145 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1146 DAG.getConstant(0, MVT::i32));
1147 }
1148
1149 SDValue MergedValues[2] = {
1150 Result,
1151 Chain
1152 };
1153 return DAG.getMergeValues(MergedValues, 2, DL);
1154 }
1155
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001156 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1157 return SDValue();
1158 }
1159
1160 // Lowering for indirect addressing
1161 const MachineFunction &MF = DAG.getMachineFunction();
1162 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1163 getTargetMachine().getFrameLowering());
1164 unsigned StackWidth = TFL->getStackWidth(MF);
1165
1166 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1167
1168 if (VT.isVector()) {
1169 unsigned NumElemVT = VT.getVectorNumElements();
1170 EVT ElemVT = VT.getVectorElementType();
1171 SDValue Loads[4];
1172
1173 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1174 "vector width in load");
1175
1176 for (unsigned i = 0; i < NumElemVT; ++i) {
1177 unsigned Channel, PtrIncr;
1178 getStackAddress(StackWidth, i, Channel, PtrIncr);
1179 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1180 DAG.getConstant(PtrIncr, MVT::i32));
1181 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1182 Chain, Ptr,
1183 DAG.getTargetConstant(Channel, MVT::i32),
1184 Op.getOperand(2));
1185 }
1186 for (unsigned i = NumElemVT; i < 4; ++i) {
1187 Loads[i] = DAG.getUNDEF(ElemVT);
1188 }
1189 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1190 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1191 } else {
1192 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1193 Chain, Ptr,
1194 DAG.getTargetConstant(0, MVT::i32), // Channel
1195 Op.getOperand(2));
1196 }
1197
1198 SDValue Ops[2];
1199 Ops[0] = LoweredLoad;
1200 Ops[1] = Chain;
1201
1202 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001203}
Tom Stellard75aadc22012-12-11 21:25:42 +00001204
Tom Stellard75aadc22012-12-11 21:25:42 +00001205/// XXX Only kernel functions are supported, so we can assume for now that
1206/// every function is a kernel function, but in the future we should use
1207/// separate calling conventions for kernel and non-kernel functions.
1208SDValue R600TargetLowering::LowerFormalArguments(
1209 SDValue Chain,
1210 CallingConv::ID CallConv,
1211 bool isVarArg,
1212 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001213 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001214 SmallVectorImpl<SDValue> &InVals) const {
1215 unsigned ParamOffsetBytes = 36;
1216 Function::const_arg_iterator FuncArg =
1217 DAG.getMachineFunction().getFunction()->arg_begin();
1218 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
1219 EVT VT = Ins[i].VT;
1220 Type *ArgType = FuncArg->getType();
1221 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
1222 32 : ArgType->getPrimitiveSizeInBits();
1223 unsigned ArgBytes = ArgSizeInBits >> 3;
1224 EVT ArgVT;
1225 if (ArgSizeInBits < VT.getSizeInBits()) {
1226 assert(!ArgType->isFloatTy() &&
1227 "Extending floating point arguments not supported yet");
1228 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
1229 } else {
1230 ArgVT = VT;
1231 }
Tom Stellard78e01292013-07-23 01:47:58 +00001232
1233 ISD::LoadExtType LoadType = ISD::EXTLOAD;
1234 if (Ins[i].Flags.isZExt()) {
1235 LoadType = ISD::ZEXTLOAD;
1236 } else if (Ins[i].Flags.isSExt()) {
1237 LoadType = ISD::SEXTLOAD;
1238 }
1239
Tom Stellard75aadc22012-12-11 21:25:42 +00001240 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1241 AMDGPUAS::PARAM_I_ADDRESS);
Tom Stellard78e01292013-07-23 01:47:58 +00001242 SDValue Arg = DAG.getExtLoad(LoadType, DL, VT, DAG.getRoot(),
Tom Stellard75aadc22012-12-11 21:25:42 +00001243 DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard8d469ed2013-02-19 15:22:44 +00001244 MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard75aadc22012-12-11 21:25:42 +00001245 ArgVT, false, false, ArgBytes);
1246 InVals.push_back(Arg);
1247 ParamOffsetBytes += ArgBytes;
1248 }
1249 return Chain;
1250}
1251
Matt Arsenault758659232013-05-18 00:21:46 +00001252EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001253 if (!VT.isVector()) return MVT::i32;
1254 return VT.changeVectorElementTypeToInteger();
1255}
1256
Benjamin Kramer193960c2013-06-11 13:32:25 +00001257static SDValue
1258CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1259 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001260 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1261 assert(RemapSwizzle.empty());
1262 SDValue NewBldVec[4] = {
1263 VectorEntry.getOperand(0),
1264 VectorEntry.getOperand(1),
1265 VectorEntry.getOperand(2),
1266 VectorEntry.getOperand(3)
1267 };
1268
1269 for (unsigned i = 0; i < 4; i++) {
1270 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1271 if (C->isZero()) {
1272 RemapSwizzle[i] = 4; // SEL_0
1273 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1274 } else if (C->isExactlyValue(1.0)) {
1275 RemapSwizzle[i] = 5; // SEL_1
1276 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1277 }
1278 }
1279
1280 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1281 continue;
1282 for (unsigned j = 0; j < i; j++) {
1283 if (NewBldVec[i] == NewBldVec[j]) {
1284 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1285 RemapSwizzle[i] = j;
1286 break;
1287 }
1288 }
1289 }
1290
1291 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1292 VectorEntry.getValueType(), NewBldVec, 4);
1293}
1294
Benjamin Kramer193960c2013-06-11 13:32:25 +00001295static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1296 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001297 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1298 assert(RemapSwizzle.empty());
1299 SDValue NewBldVec[4] = {
1300 VectorEntry.getOperand(0),
1301 VectorEntry.getOperand(1),
1302 VectorEntry.getOperand(2),
1303 VectorEntry.getOperand(3)
1304 };
1305 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001306 for (unsigned i = 0; i < 4; i++)
1307 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001308
1309 for (unsigned i = 0; i < 4; i++) {
1310 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1311 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1312 ->getZExtValue();
1313 if (!isUnmovable[Idx]) {
1314 // Swap i and Idx
1315 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001316 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001317 }
1318 isUnmovable[Idx] = true;
1319 }
1320 }
1321
1322 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1323 VectorEntry.getValueType(), NewBldVec, 4);
1324}
1325
1326
1327SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1328SDValue Swz[4], SelectionDAG &DAG) const {
1329 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1330 // Old -> New swizzle values
1331 DenseMap<unsigned, unsigned> SwizzleRemap;
1332
1333 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1334 for (unsigned i = 0; i < 4; i++) {
1335 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1336 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1337 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1338 }
1339
1340 SwizzleRemap.clear();
1341 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1342 for (unsigned i = 0; i < 4; i++) {
1343 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1344 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1345 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1346 }
1347
1348 return BuildVector;
1349}
1350
1351
Tom Stellard75aadc22012-12-11 21:25:42 +00001352//===----------------------------------------------------------------------===//
1353// Custom DAG Optimizations
1354//===----------------------------------------------------------------------===//
1355
1356SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1357 DAGCombinerInfo &DCI) const {
1358 SelectionDAG &DAG = DCI.DAG;
1359
1360 switch (N->getOpcode()) {
1361 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1362 case ISD::FP_ROUND: {
1363 SDValue Arg = N->getOperand(0);
1364 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001365 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001366 Arg.getOperand(0));
1367 }
1368 break;
1369 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001370
1371 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1372 // (i32 select_cc f32, f32, -1, 0 cc)
1373 //
1374 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1375 // this to one of the SET*_DX10 instructions.
1376 case ISD::FP_TO_SINT: {
1377 SDValue FNeg = N->getOperand(0);
1378 if (FNeg.getOpcode() != ISD::FNEG) {
1379 return SDValue();
1380 }
1381 SDValue SelectCC = FNeg.getOperand(0);
1382 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1383 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1384 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1385 !isHWTrueValue(SelectCC.getOperand(2)) ||
1386 !isHWFalseValue(SelectCC.getOperand(3))) {
1387 return SDValue();
1388 }
1389
Andrew Trickef9de2a2013-05-25 02:42:55 +00001390 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001391 SelectCC.getOperand(0), // LHS
1392 SelectCC.getOperand(1), // RHS
1393 DAG.getConstant(-1, MVT::i32), // True
1394 DAG.getConstant(0, MVT::i32), // Flase
1395 SelectCC.getOperand(4)); // CC
1396
1397 break;
1398 }
Tom Stellard365366f2013-01-23 02:09:06 +00001399 // Extract_vec (Build_vector) generated by custom lowering
1400 // also needs to be customly combined
1401 case ISD::EXTRACT_VECTOR_ELT: {
1402 SDValue Arg = N->getOperand(0);
1403 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1404 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1405 unsigned Element = Const->getZExtValue();
1406 return Arg->getOperand(Element);
1407 }
1408 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001409 if (Arg.getOpcode() == ISD::BITCAST &&
1410 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1411 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1412 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001413 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001414 Arg->getOperand(0).getOperand(Element));
1415 }
1416 }
Tom Stellard365366f2013-01-23 02:09:06 +00001417 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001418
1419 case ISD::SELECT_CC: {
1420 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1421 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001422 //
1423 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1424 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001425 SDValue LHS = N->getOperand(0);
1426 if (LHS.getOpcode() != ISD::SELECT_CC) {
1427 return SDValue();
1428 }
1429
1430 SDValue RHS = N->getOperand(1);
1431 SDValue True = N->getOperand(2);
1432 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001433 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001434
1435 if (LHS.getOperand(2).getNode() != True.getNode() ||
1436 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001437 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001438 return SDValue();
1439 }
1440
Tom Stellard5e524892013-03-08 15:37:11 +00001441 switch (NCC) {
1442 default: return SDValue();
1443 case ISD::SETNE: return LHS;
1444 case ISD::SETEQ: {
1445 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1446 LHSCC = ISD::getSetCCInverse(LHSCC,
1447 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001448 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001449 LHS.getOperand(0),
1450 LHS.getOperand(1),
1451 LHS.getOperand(2),
1452 LHS.getOperand(3),
1453 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001454 }
Tom Stellard5e524892013-03-08 15:37:11 +00001455 }
1456 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001457 case AMDGPUISD::EXPORT: {
1458 SDValue Arg = N->getOperand(1);
1459 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1460 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001461
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001462 SDValue NewArgs[8] = {
1463 N->getOperand(0), // Chain
1464 SDValue(),
1465 N->getOperand(2), // ArrayBase
1466 N->getOperand(3), // Type
1467 N->getOperand(4), // SWZ_X
1468 N->getOperand(5), // SWZ_Y
1469 N->getOperand(6), // SWZ_Z
1470 N->getOperand(7) // SWZ_W
1471 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001472 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001473 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001474 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001475 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001476 case AMDGPUISD::TEXTURE_FETCH: {
1477 SDValue Arg = N->getOperand(1);
1478 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1479 break;
1480
1481 SDValue NewArgs[19] = {
1482 N->getOperand(0),
1483 N->getOperand(1),
1484 N->getOperand(2),
1485 N->getOperand(3),
1486 N->getOperand(4),
1487 N->getOperand(5),
1488 N->getOperand(6),
1489 N->getOperand(7),
1490 N->getOperand(8),
1491 N->getOperand(9),
1492 N->getOperand(10),
1493 N->getOperand(11),
1494 N->getOperand(12),
1495 N->getOperand(13),
1496 N->getOperand(14),
1497 N->getOperand(15),
1498 N->getOperand(16),
1499 N->getOperand(17),
1500 N->getOperand(18),
1501 };
1502 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1503 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1504 NewArgs, 19);
1505 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001506 }
1507 return SDValue();
1508}