blob: 303c0e190f6f09d11721db0b75e2581f07466092 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
36 computeRegisterProperties();
37
38 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
39 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
40 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
41 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
42
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000043 setOperationAction(ISD::FCOS, MVT::f32, Custom);
44 setOperationAction(ISD::FSIN, MVT::f32, Custom);
45
Tom Stellard75aadc22012-12-11 21:25:42 +000046 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
47
Tom Stellard492ebea2013-03-08 15:37:07 +000048 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
49 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000050
51 setOperationAction(ISD::FSUB, MVT::f32, Expand);
52
53 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
54 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
55 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000056
Tom Stellard75aadc22012-12-11 21:25:42 +000057 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
58 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
59
Tom Stellarde8f9f282013-03-08 15:37:05 +000060 setOperationAction(ISD::SETCC, MVT::i32, Expand);
61 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000062 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
63
64 setOperationAction(ISD::SELECT, MVT::i32, Custom);
65 setOperationAction(ISD::SELECT, MVT::f32, Custom);
66
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000067 // Legalize loads and stores to the private address space.
68 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000069 setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000070 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000071 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
72 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
73 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
74 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000075 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000076 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000077 setOperationAction(ISD::STORE, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
79
Tom Stellard365366f2013-01-23 02:09:06 +000080 setOperationAction(ISD::LOAD, MVT::i32, Custom);
81 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000082 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
83
Tom Stellard75aadc22012-12-11 21:25:42 +000084 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000085 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000086 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000087 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +000088 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000089
Michel Danzer49812b52013-07-10 16:37:07 +000090 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
91
Tom Stellardb852af52013-03-08 15:37:03 +000092 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000093 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +000094 setSchedulingPreference(Sched::VLIW);
95}
96
97MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
98 MachineInstr * MI, MachineBasicBlock * BB) const {
99 MachineFunction * MF = BB->getParent();
100 MachineRegisterInfo &MRI = MF->getRegInfo();
101 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000102 const R600InstrInfo *TII =
103 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000104
105 switch (MI->getOpcode()) {
106 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000107 case AMDGPU::CLAMP_R600: {
108 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
109 AMDGPU::MOV,
110 MI->getOperand(0).getReg(),
111 MI->getOperand(1).getReg());
112 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
113 break;
114 }
115
116 case AMDGPU::FABS_R600: {
117 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
118 AMDGPU::MOV,
119 MI->getOperand(0).getReg(),
120 MI->getOperand(1).getReg());
121 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
122 break;
123 }
124
125 case AMDGPU::FNEG_R600: {
126 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
127 AMDGPU::MOV,
128 MI->getOperand(0).getReg(),
129 MI->getOperand(1).getReg());
130 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
131 break;
132 }
133
Tom Stellard75aadc22012-12-11 21:25:42 +0000134 case AMDGPU::MASK_WRITE: {
135 unsigned maskedRegister = MI->getOperand(0).getReg();
136 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
137 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
138 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
139 break;
140 }
141
Tom Stellardc026e8b2013-06-28 15:47:08 +0000142 case AMDGPU::LDS_READ_RET: {
143 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
144 TII->get(MI->getOpcode()),
145 AMDGPU::OQAP);
146 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
147 NewMI.addOperand(MI->getOperand(i));
148 }
149 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
150 MI->getOperand(0).getReg(),
151 AMDGPU::OQAP);
152 break;
153 }
154
Tom Stellard75aadc22012-12-11 21:25:42 +0000155 case AMDGPU::MOV_IMM_F32:
156 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
157 MI->getOperand(1).getFPImm()->getValueAPF()
158 .bitcastToAPInt().getZExtValue());
159 break;
160 case AMDGPU::MOV_IMM_I32:
161 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
162 MI->getOperand(1).getImm());
163 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000164 case AMDGPU::CONST_COPY: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
166 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000167 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000168 MI->getOperand(1).getImm());
169 break;
170 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000171
172 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
173 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
174 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
175
176 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
177 .addOperand(MI->getOperand(0))
178 .addOperand(MI->getOperand(1))
179 .addImm(EOP); // Set End of program bit
180 break;
181 }
182
Tom Stellard75aadc22012-12-11 21:25:42 +0000183 case AMDGPU::TXD: {
184 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
185 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000186 MachineOperand &RID = MI->getOperand(4);
187 MachineOperand &SID = MI->getOperand(5);
188 unsigned TextureId = MI->getOperand(6).getImm();
189 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
190 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000191
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000192 switch (TextureId) {
193 case 5: // Rect
194 CTX = CTY = 0;
195 break;
196 case 6: // Shadow1D
197 SrcW = SrcZ;
198 break;
199 case 7: // Shadow2D
200 SrcW = SrcZ;
201 break;
202 case 8: // ShadowRect
203 CTX = CTY = 0;
204 SrcW = SrcZ;
205 break;
206 case 9: // 1DArray
207 SrcZ = SrcY;
208 CTZ = 0;
209 break;
210 case 10: // 2DArray
211 CTZ = 0;
212 break;
213 case 11: // Shadow1DArray
214 SrcZ = SrcY;
215 CTZ = 0;
216 break;
217 case 12: // Shadow2DArray
218 CTZ = 0;
219 break;
220 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000221 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
222 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000223 .addImm(SrcX)
224 .addImm(SrcY)
225 .addImm(SrcZ)
226 .addImm(SrcW)
227 .addImm(0)
228 .addImm(0)
229 .addImm(0)
230 .addImm(0)
231 .addImm(1)
232 .addImm(2)
233 .addImm(3)
234 .addOperand(RID)
235 .addOperand(SID)
236 .addImm(CTX)
237 .addImm(CTY)
238 .addImm(CTZ)
239 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000240 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
241 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000242 .addImm(SrcX)
243 .addImm(SrcY)
244 .addImm(SrcZ)
245 .addImm(SrcW)
246 .addImm(0)
247 .addImm(0)
248 .addImm(0)
249 .addImm(0)
250 .addImm(1)
251 .addImm(2)
252 .addImm(3)
253 .addOperand(RID)
254 .addOperand(SID)
255 .addImm(CTX)
256 .addImm(CTY)
257 .addImm(CTZ)
258 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000259 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
260 .addOperand(MI->getOperand(0))
261 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000262 .addImm(SrcX)
263 .addImm(SrcY)
264 .addImm(SrcZ)
265 .addImm(SrcW)
266 .addImm(0)
267 .addImm(0)
268 .addImm(0)
269 .addImm(0)
270 .addImm(1)
271 .addImm(2)
272 .addImm(3)
273 .addOperand(RID)
274 .addOperand(SID)
275 .addImm(CTX)
276 .addImm(CTY)
277 .addImm(CTZ)
278 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000279 .addReg(T0, RegState::Implicit)
280 .addReg(T1, RegState::Implicit);
281 break;
282 }
283
284 case AMDGPU::TXD_SHADOW: {
285 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
286 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000287 MachineOperand &RID = MI->getOperand(4);
288 MachineOperand &SID = MI->getOperand(5);
289 unsigned TextureId = MI->getOperand(6).getImm();
290 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
291 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
292
293 switch (TextureId) {
294 case 5: // Rect
295 CTX = CTY = 0;
296 break;
297 case 6: // Shadow1D
298 SrcW = SrcZ;
299 break;
300 case 7: // Shadow2D
301 SrcW = SrcZ;
302 break;
303 case 8: // ShadowRect
304 CTX = CTY = 0;
305 SrcW = SrcZ;
306 break;
307 case 9: // 1DArray
308 SrcZ = SrcY;
309 CTZ = 0;
310 break;
311 case 10: // 2DArray
312 CTZ = 0;
313 break;
314 case 11: // Shadow1DArray
315 SrcZ = SrcY;
316 CTZ = 0;
317 break;
318 case 12: // Shadow2DArray
319 CTZ = 0;
320 break;
321 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000322
323 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
324 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000325 .addImm(SrcX)
326 .addImm(SrcY)
327 .addImm(SrcZ)
328 .addImm(SrcW)
329 .addImm(0)
330 .addImm(0)
331 .addImm(0)
332 .addImm(0)
333 .addImm(1)
334 .addImm(2)
335 .addImm(3)
336 .addOperand(RID)
337 .addOperand(SID)
338 .addImm(CTX)
339 .addImm(CTY)
340 .addImm(CTZ)
341 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000342 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
343 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000344 .addImm(SrcX)
345 .addImm(SrcY)
346 .addImm(SrcZ)
347 .addImm(SrcW)
348 .addImm(0)
349 .addImm(0)
350 .addImm(0)
351 .addImm(0)
352 .addImm(1)
353 .addImm(2)
354 .addImm(3)
355 .addOperand(RID)
356 .addOperand(SID)
357 .addImm(CTX)
358 .addImm(CTY)
359 .addImm(CTZ)
360 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000361 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
362 .addOperand(MI->getOperand(0))
363 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000364 .addImm(SrcX)
365 .addImm(SrcY)
366 .addImm(SrcZ)
367 .addImm(SrcW)
368 .addImm(0)
369 .addImm(0)
370 .addImm(0)
371 .addImm(0)
372 .addImm(1)
373 .addImm(2)
374 .addImm(3)
375 .addOperand(RID)
376 .addOperand(SID)
377 .addImm(CTX)
378 .addImm(CTY)
379 .addImm(CTZ)
380 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000381 .addReg(T0, RegState::Implicit)
382 .addReg(T1, RegState::Implicit);
383 break;
384 }
385
386 case AMDGPU::BRANCH:
387 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000388 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000389 break;
390
391 case AMDGPU::BRANCH_COND_f32: {
392 MachineInstr *NewMI =
393 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
394 AMDGPU::PREDICATE_BIT)
395 .addOperand(MI->getOperand(1))
396 .addImm(OPCODE_IS_NOT_ZERO)
397 .addImm(0); // Flags
398 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000399 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000400 .addOperand(MI->getOperand(0))
401 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
402 break;
403 }
404
405 case AMDGPU::BRANCH_COND_i32: {
406 MachineInstr *NewMI =
407 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
408 AMDGPU::PREDICATE_BIT)
409 .addOperand(MI->getOperand(1))
410 .addImm(OPCODE_IS_NOT_ZERO_INT)
411 .addImm(0); // Flags
412 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000413 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000414 .addOperand(MI->getOperand(0))
415 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
416 break;
417 }
418
Tom Stellard75aadc22012-12-11 21:25:42 +0000419 case AMDGPU::EG_ExportSwz:
420 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000421 // Instruction is left unmodified if its not the last one of its type
422 bool isLastInstructionOfItsType = true;
423 unsigned InstExportType = MI->getOperand(1).getImm();
424 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
425 EndBlock = BB->end(); NextExportInst != EndBlock;
426 NextExportInst = llvm::next(NextExportInst)) {
427 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
428 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
429 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
430 .getImm();
431 if (CurrentInstExportType == InstExportType) {
432 isLastInstructionOfItsType = false;
433 break;
434 }
435 }
436 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000437 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000438 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000439 return BB;
440 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
441 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
442 .addOperand(MI->getOperand(0))
443 .addOperand(MI->getOperand(1))
444 .addOperand(MI->getOperand(2))
445 .addOperand(MI->getOperand(3))
446 .addOperand(MI->getOperand(4))
447 .addOperand(MI->getOperand(5))
448 .addOperand(MI->getOperand(6))
449 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000450 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000451 break;
452 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000453 case AMDGPU::RETURN: {
454 // RETURN instructions must have the live-out registers as implicit uses,
455 // otherwise they appear dead.
456 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
457 MachineInstrBuilder MIB(*MF, MI);
458 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
459 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
460 return BB;
461 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000462 }
463
464 MI->eraseFromParent();
465 return BB;
466}
467
468//===----------------------------------------------------------------------===//
469// Custom DAG Lowering Operations
470//===----------------------------------------------------------------------===//
471
Tom Stellard75aadc22012-12-11 21:25:42 +0000472SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000473 MachineFunction &MF = DAG.getMachineFunction();
474 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 switch (Op.getOpcode()) {
476 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000477 case ISD::FCOS:
478 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000479 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
480 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000481 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000482 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000483 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000484 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000485 case ISD::INTRINSIC_VOID: {
486 SDValue Chain = Op.getOperand(0);
487 unsigned IntrinsicID =
488 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
489 switch (IntrinsicID) {
490 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000491 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
492 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000493 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000494 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000496 case AMDGPUIntrinsic::R600_store_swizzle: {
497 const SDValue Args[8] = {
498 Chain,
499 Op.getOperand(2), // Export Value
500 Op.getOperand(3), // ArrayBase
501 Op.getOperand(4), // Type
502 DAG.getConstant(0, MVT::i32), // SWZ_X
503 DAG.getConstant(1, MVT::i32), // SWZ_Y
504 DAG.getConstant(2, MVT::i32), // SWZ_Z
505 DAG.getConstant(3, MVT::i32) // SWZ_W
506 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000507 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000508 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000510
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 // default for switch(IntrinsicID)
512 default: break;
513 }
514 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
515 break;
516 }
517 case ISD::INTRINSIC_WO_CHAIN: {
518 unsigned IntrinsicID =
519 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
520 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000521 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000522 switch(IntrinsicID) {
523 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
524 case AMDGPUIntrinsic::R600_load_input: {
525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000527 MachineFunction &MF = DAG.getMachineFunction();
528 MachineRegisterInfo &MRI = MF.getRegInfo();
529 MRI.addLiveIn(Reg);
530 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000531 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000532 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000533
534 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000536 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
537 MachineSDNode *interp;
538 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000539 const MachineFunction &MF = DAG.getMachineFunction();
540 const R600InstrInfo *TII =
541 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000542 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
543 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
544 return DAG.getTargetExtractSubreg(
545 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
546 DL, MVT::f32, SDValue(interp, 0));
547 }
548
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000549 MachineFunction &MF = DAG.getMachineFunction();
550 MachineRegisterInfo &MRI = MF.getRegInfo();
551 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
552 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
553 MRI.addLiveIn(RegisterI);
554 MRI.addLiveIn(RegisterJ);
555 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
556 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
557 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
558 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
559
Tom Stellard41afe6a2013-02-05 17:09:14 +0000560 if (slot % 4 < 2)
561 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
562 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000563 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000564 else
565 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
566 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000567 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000568 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000569 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000570 case AMDGPUIntrinsic::R600_tex:
571 case AMDGPUIntrinsic::R600_texc:
572 case AMDGPUIntrinsic::R600_txl:
573 case AMDGPUIntrinsic::R600_txlc:
574 case AMDGPUIntrinsic::R600_txb:
575 case AMDGPUIntrinsic::R600_txbc:
576 case AMDGPUIntrinsic::R600_txf:
577 case AMDGPUIntrinsic::R600_txq:
578 case AMDGPUIntrinsic::R600_ddx:
579 case AMDGPUIntrinsic::R600_ddy: {
580 unsigned TextureOp;
581 switch (IntrinsicID) {
582 case AMDGPUIntrinsic::R600_tex:
583 TextureOp = 0;
584 break;
585 case AMDGPUIntrinsic::R600_texc:
586 TextureOp = 1;
587 break;
588 case AMDGPUIntrinsic::R600_txl:
589 TextureOp = 2;
590 break;
591 case AMDGPUIntrinsic::R600_txlc:
592 TextureOp = 3;
593 break;
594 case AMDGPUIntrinsic::R600_txb:
595 TextureOp = 4;
596 break;
597 case AMDGPUIntrinsic::R600_txbc:
598 TextureOp = 5;
599 break;
600 case AMDGPUIntrinsic::R600_txf:
601 TextureOp = 6;
602 break;
603 case AMDGPUIntrinsic::R600_txq:
604 TextureOp = 7;
605 break;
606 case AMDGPUIntrinsic::R600_ddx:
607 TextureOp = 8;
608 break;
609 case AMDGPUIntrinsic::R600_ddy:
610 TextureOp = 9;
611 break;
612 default:
613 llvm_unreachable("Unknow Texture Operation");
614 }
615
616 SDValue TexArgs[19] = {
617 DAG.getConstant(TextureOp, MVT::i32),
618 Op.getOperand(1),
619 DAG.getConstant(0, MVT::i32),
620 DAG.getConstant(1, MVT::i32),
621 DAG.getConstant(2, MVT::i32),
622 DAG.getConstant(3, MVT::i32),
623 Op.getOperand(2),
624 Op.getOperand(3),
625 Op.getOperand(4),
626 DAG.getConstant(0, MVT::i32),
627 DAG.getConstant(1, MVT::i32),
628 DAG.getConstant(2, MVT::i32),
629 DAG.getConstant(3, MVT::i32),
630 Op.getOperand(5),
631 Op.getOperand(6),
632 Op.getOperand(7),
633 Op.getOperand(8),
634 Op.getOperand(9),
635 Op.getOperand(10)
636 };
637 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
638 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000639 case AMDGPUIntrinsic::AMDGPU_dp4: {
640 SDValue Args[8] = {
641 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
642 DAG.getConstant(0, MVT::i32)),
643 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
644 DAG.getConstant(0, MVT::i32)),
645 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
646 DAG.getConstant(1, MVT::i32)),
647 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
648 DAG.getConstant(1, MVT::i32)),
649 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
650 DAG.getConstant(2, MVT::i32)),
651 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
652 DAG.getConstant(2, MVT::i32)),
653 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
654 DAG.getConstant(3, MVT::i32)),
655 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
656 DAG.getConstant(3, MVT::i32))
657 };
658 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
659 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000660
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000661 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000662 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000663 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000664 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000665 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000666 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000667 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000668 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000669 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000670 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000671 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000672 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000673 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000674 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000675 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000676 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000677 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000678 return LowerImplicitParameter(DAG, VT, DL, 8);
679
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
682 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000683 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
685 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
688 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
697 AMDGPU::T0_Z, VT);
698 }
699 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
700 break;
701 }
702 } // end switch(Op.getOpcode())
703 return SDValue();
704}
705
706void R600TargetLowering::ReplaceNodeResults(SDNode *N,
707 SmallVectorImpl<SDValue> &Results,
708 SelectionDAG &DAG) const {
709 switch (N->getOpcode()) {
710 default: return;
711 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000712 return;
713 case ISD::LOAD: {
714 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
715 Results.push_back(SDValue(Node, 0));
716 Results.push_back(SDValue(Node, 1));
717 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
718 // function
719 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
720 return;
721 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000722 case ISD::STORE:
723 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
724 Results.push_back(SDValue(Node, 0));
725 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000726 }
727}
728
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000729SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
730 // On hw >= R700, COS/SIN input must be between -1. and 1.
731 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
732 EVT VT = Op.getValueType();
733 SDValue Arg = Op.getOperand(0);
734 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
735 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
736 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
737 DAG.getConstantFP(0.15915494309, MVT::f32)),
738 DAG.getConstantFP(0.5, MVT::f32)));
739 unsigned TrigNode;
740 switch (Op.getOpcode()) {
741 case ISD::FCOS:
742 TrigNode = AMDGPUISD::COS_HW;
743 break;
744 case ISD::FSIN:
745 TrigNode = AMDGPUISD::SIN_HW;
746 break;
747 default:
748 llvm_unreachable("Wrong trig opcode");
749 }
750 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
751 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
752 DAG.getConstantFP(-0.5, MVT::f32)));
753 if (Gen >= AMDGPUSubtarget::R700)
754 return TrigVal;
755 // On R600 hw, COS/SIN input must be between -Pi and Pi.
756 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
757 DAG.getConstantFP(3.14159265359, MVT::f32));
758}
759
Tom Stellard75aadc22012-12-11 21:25:42 +0000760SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
761 return DAG.getNode(
762 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000763 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000764 MVT::i1,
765 Op, DAG.getConstantFP(0.0f, MVT::f32),
766 DAG.getCondCode(ISD::SETNE)
767 );
768}
769
Tom Stellard75aadc22012-12-11 21:25:42 +0000770SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000771 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 unsigned DwordOffset) const {
773 unsigned ByteOffset = DwordOffset * 4;
774 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000775 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000776
777 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
778 assert(isInt<16>(ByteOffset));
779
780 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
781 DAG.getConstant(ByteOffset, MVT::i32), // PTR
782 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
783 false, false, false, 0);
784}
785
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000786SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
787
788 MachineFunction &MF = DAG.getMachineFunction();
789 const AMDGPUFrameLowering *TFL =
790 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
791
792 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
793 assert(FIN);
794
795 unsigned FrameIndex = FIN->getIndex();
796 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
797 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
798}
799
Tom Stellard75aadc22012-12-11 21:25:42 +0000800bool R600TargetLowering::isZero(SDValue Op) const {
801 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
802 return Cst->isNullValue();
803 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
804 return CstFP->isZero();
805 } else {
806 return false;
807 }
808}
809
810SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000811 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000812 EVT VT = Op.getValueType();
813
814 SDValue LHS = Op.getOperand(0);
815 SDValue RHS = Op.getOperand(1);
816 SDValue True = Op.getOperand(2);
817 SDValue False = Op.getOperand(3);
818 SDValue CC = Op.getOperand(4);
819 SDValue Temp;
820
821 // LHS and RHS are guaranteed to be the same value type
822 EVT CompareVT = LHS.getValueType();
823
824 // Check if we can lower this to a native operation.
825
Tom Stellard2add82d2013-03-08 15:37:09 +0000826 // Try to lower to a SET* instruction:
827 //
828 // SET* can match the following patterns:
829 //
830 // select_cc f32, f32, -1, 0, cc_any
831 // select_cc f32, f32, 1.0f, 0.0f, cc_any
832 // select_cc i32, i32, -1, 0, cc_any
833 //
834
835 // Move hardware True/False values to the correct operand.
836 if (isHWTrueValue(False) && isHWFalseValue(True)) {
837 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
838 std::swap(False, True);
839 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
840 }
841
842 if (isHWTrueValue(True) && isHWFalseValue(False) &&
843 (CompareVT == VT || VT == MVT::i32)) {
844 // This can be matched by a SET* instruction.
845 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
846 }
847
Tom Stellard75aadc22012-12-11 21:25:42 +0000848 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000849 //
850 // CND* can match the following patterns:
851 //
852 // select_cc f32, 0.0, f32, f32, cc_any
853 // select_cc f32, 0.0, i32, i32, cc_any
854 // select_cc i32, 0, f32, f32, cc_any
855 // select_cc i32, 0, i32, i32, cc_any
856 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000857 if (isZero(LHS) || isZero(RHS)) {
858 SDValue Cond = (isZero(LHS) ? RHS : LHS);
859 SDValue Zero = (isZero(LHS) ? LHS : RHS);
860 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
861 if (CompareVT != VT) {
862 // Bitcast True / False to the correct types. This will end up being
863 // a nop, but it allows us to define only a single pattern in the
864 // .TD files for each CND* instruction rather than having to have
865 // one pattern for integer True/False and one for fp True/False
866 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
867 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
868 }
869 if (isZero(LHS)) {
870 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
871 }
872
873 switch (CCOpcode) {
874 case ISD::SETONE:
875 case ISD::SETUNE:
876 case ISD::SETNE:
877 case ISD::SETULE:
878 case ISD::SETULT:
879 case ISD::SETOLE:
880 case ISD::SETOLT:
881 case ISD::SETLE:
882 case ISD::SETLT:
883 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
884 Temp = True;
885 True = False;
886 False = Temp;
887 break;
888 default:
889 break;
890 }
891 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
892 Cond, Zero,
893 True, False,
894 DAG.getCondCode(CCOpcode));
895 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
896 }
897
Tom Stellard75aadc22012-12-11 21:25:42 +0000898
899 // Possible Min/Max pattern
900 SDValue MinMax = LowerMinMax(Op, DAG);
901 if (MinMax.getNode()) {
902 return MinMax;
903 }
904
905 // If we make it this for it means we have no native instructions to handle
906 // this SELECT_CC, so we must lower it.
907 SDValue HWTrue, HWFalse;
908
909 if (CompareVT == MVT::f32) {
910 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
911 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
912 } else if (CompareVT == MVT::i32) {
913 HWTrue = DAG.getConstant(-1, CompareVT);
914 HWFalse = DAG.getConstant(0, CompareVT);
915 }
916 else {
917 assert(!"Unhandled value type in LowerSELECT_CC");
918 }
919
920 // Lower this unsupported SELECT_CC into a combination of two supported
921 // SELECT_CC operations.
922 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
923
924 return DAG.getNode(ISD::SELECT_CC, DL, VT,
925 Cond, HWFalse,
926 True, False,
927 DAG.getCondCode(ISD::SETNE));
928}
929
930SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
931 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000932 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000933 Op.getValueType(),
934 Op.getOperand(0),
935 DAG.getConstant(0, MVT::i32),
936 Op.getOperand(1),
937 Op.getOperand(2),
938 DAG.getCondCode(ISD::SETNE));
939}
940
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000941/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
942/// convert these pointers to a register index. Each register holds
943/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
944/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
945/// for indirect addressing.
946SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
947 unsigned StackWidth,
948 SelectionDAG &DAG) const {
949 unsigned SRLPad;
950 switch(StackWidth) {
951 case 1:
952 SRLPad = 2;
953 break;
954 case 2:
955 SRLPad = 3;
956 break;
957 case 4:
958 SRLPad = 4;
959 break;
960 default: llvm_unreachable("Invalid stack width");
961 }
962
Andrew Trickef9de2a2013-05-25 02:42:55 +0000963 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000964 DAG.getConstant(SRLPad, MVT::i32));
965}
966
967void R600TargetLowering::getStackAddress(unsigned StackWidth,
968 unsigned ElemIdx,
969 unsigned &Channel,
970 unsigned &PtrIncr) const {
971 switch (StackWidth) {
972 default:
973 case 1:
974 Channel = 0;
975 if (ElemIdx > 0) {
976 PtrIncr = 1;
977 } else {
978 PtrIncr = 0;
979 }
980 break;
981 case 2:
982 Channel = ElemIdx % 2;
983 if (ElemIdx == 2) {
984 PtrIncr = 1;
985 } else {
986 PtrIncr = 0;
987 }
988 break;
989 case 4:
990 Channel = ElemIdx;
991 PtrIncr = 0;
992 break;
993 }
994}
995
Tom Stellard75aadc22012-12-11 21:25:42 +0000996SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000997 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000998 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
999 SDValue Chain = Op.getOperand(0);
1000 SDValue Value = Op.getOperand(1);
1001 SDValue Ptr = Op.getOperand(2);
1002
1003 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
1004 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
1005 // Convert pointer from byte address to dword address.
1006 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1007 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1008 Ptr, DAG.getConstant(2, MVT::i32)));
1009
1010 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1011 assert(!"Truncated and indexed stores not supported yet");
1012 } else {
1013 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1014 }
1015 return Chain;
1016 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001017
1018 EVT ValueVT = Value.getValueType();
1019
1020 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1021 return SDValue();
1022 }
1023
1024 // Lowering for indirect addressing
1025
1026 const MachineFunction &MF = DAG.getMachineFunction();
1027 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1028 getTargetMachine().getFrameLowering());
1029 unsigned StackWidth = TFL->getStackWidth(MF);
1030
1031 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1032
1033 if (ValueVT.isVector()) {
1034 unsigned NumElemVT = ValueVT.getVectorNumElements();
1035 EVT ElemVT = ValueVT.getVectorElementType();
1036 SDValue Stores[4];
1037
1038 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1039 "vector width in load");
1040
1041 for (unsigned i = 0; i < NumElemVT; ++i) {
1042 unsigned Channel, PtrIncr;
1043 getStackAddress(StackWidth, i, Channel, PtrIncr);
1044 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1045 DAG.getConstant(PtrIncr, MVT::i32));
1046 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1047 Value, DAG.getConstant(i, MVT::i32));
1048
1049 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1050 Chain, Elem, Ptr,
1051 DAG.getTargetConstant(Channel, MVT::i32));
1052 }
1053 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1054 } else {
1055 if (ValueVT == MVT::i8) {
1056 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1057 }
1058 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001059 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001060 }
1061
1062 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001063}
1064
Tom Stellard365366f2013-01-23 02:09:06 +00001065// return (512 + (kc_bank << 12)
1066static int
1067ConstantAddressBlock(unsigned AddressSpace) {
1068 switch (AddressSpace) {
1069 case AMDGPUAS::CONSTANT_BUFFER_0:
1070 return 512;
1071 case AMDGPUAS::CONSTANT_BUFFER_1:
1072 return 512 + 4096;
1073 case AMDGPUAS::CONSTANT_BUFFER_2:
1074 return 512 + 4096 * 2;
1075 case AMDGPUAS::CONSTANT_BUFFER_3:
1076 return 512 + 4096 * 3;
1077 case AMDGPUAS::CONSTANT_BUFFER_4:
1078 return 512 + 4096 * 4;
1079 case AMDGPUAS::CONSTANT_BUFFER_5:
1080 return 512 + 4096 * 5;
1081 case AMDGPUAS::CONSTANT_BUFFER_6:
1082 return 512 + 4096 * 6;
1083 case AMDGPUAS::CONSTANT_BUFFER_7:
1084 return 512 + 4096 * 7;
1085 case AMDGPUAS::CONSTANT_BUFFER_8:
1086 return 512 + 4096 * 8;
1087 case AMDGPUAS::CONSTANT_BUFFER_9:
1088 return 512 + 4096 * 9;
1089 case AMDGPUAS::CONSTANT_BUFFER_10:
1090 return 512 + 4096 * 10;
1091 case AMDGPUAS::CONSTANT_BUFFER_11:
1092 return 512 + 4096 * 11;
1093 case AMDGPUAS::CONSTANT_BUFFER_12:
1094 return 512 + 4096 * 12;
1095 case AMDGPUAS::CONSTANT_BUFFER_13:
1096 return 512 + 4096 * 13;
1097 case AMDGPUAS::CONSTANT_BUFFER_14:
1098 return 512 + 4096 * 14;
1099 case AMDGPUAS::CONSTANT_BUFFER_15:
1100 return 512 + 4096 * 15;
1101 default:
1102 return -1;
1103 }
1104}
1105
1106SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1107{
1108 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001109 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001110 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1111 SDValue Chain = Op.getOperand(0);
1112 SDValue Ptr = Op.getOperand(1);
1113 SDValue LoweredLoad;
1114
1115 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1116 if (ConstantBlock > -1) {
1117 SDValue Result;
1118 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001119 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1120 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001121 SDValue Slots[4];
1122 for (unsigned i = 0; i < 4; i++) {
1123 // We want Const position encoded with the following formula :
1124 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1125 // const_index is Ptr computed by llvm using an alignment of 16.
1126 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1127 // then div by 4 at the ISel step
1128 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1129 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1130 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1131 }
1132 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1133 } else {
1134 // non constant ptr cant be folded, keeps it as a v4f32 load
1135 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001136 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001137 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001138 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001139 );
1140 }
1141
1142 if (!VT.isVector()) {
1143 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1144 DAG.getConstant(0, MVT::i32));
1145 }
1146
1147 SDValue MergedValues[2] = {
1148 Result,
1149 Chain
1150 };
1151 return DAG.getMergeValues(MergedValues, 2, DL);
1152 }
1153
Tom Stellard84021442013-07-23 01:48:24 +00001154 // For most operations returning SDValue() will result int he node being
1155 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1156 // we need to manually expand loads that may be legal in some address spaces
1157 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1158 // for compute shaders, since the data is sign extended when it is uploaded
1159 // to the buffer. Howerver SEXT loads from other addresspaces are not
1160 // supported, so we need to expand them here.
1161 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1162 EVT MemVT = LoadNode->getMemoryVT();
1163 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1164 SDValue ShiftAmount =
1165 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1166 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1167 LoadNode->getPointerInfo(), MemVT,
1168 LoadNode->isVolatile(),
1169 LoadNode->isNonTemporal(),
1170 LoadNode->getAlignment());
1171 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1172 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1173
1174 SDValue MergedValues[2] = { Sra, Chain };
1175 return DAG.getMergeValues(MergedValues, 2, DL);
1176 }
1177
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001178 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1179 return SDValue();
1180 }
1181
1182 // Lowering for indirect addressing
1183 const MachineFunction &MF = DAG.getMachineFunction();
1184 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1185 getTargetMachine().getFrameLowering());
1186 unsigned StackWidth = TFL->getStackWidth(MF);
1187
1188 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1189
1190 if (VT.isVector()) {
1191 unsigned NumElemVT = VT.getVectorNumElements();
1192 EVT ElemVT = VT.getVectorElementType();
1193 SDValue Loads[4];
1194
1195 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1196 "vector width in load");
1197
1198 for (unsigned i = 0; i < NumElemVT; ++i) {
1199 unsigned Channel, PtrIncr;
1200 getStackAddress(StackWidth, i, Channel, PtrIncr);
1201 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1202 DAG.getConstant(PtrIncr, MVT::i32));
1203 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1204 Chain, Ptr,
1205 DAG.getTargetConstant(Channel, MVT::i32),
1206 Op.getOperand(2));
1207 }
1208 for (unsigned i = NumElemVT; i < 4; ++i) {
1209 Loads[i] = DAG.getUNDEF(ElemVT);
1210 }
1211 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1212 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1213 } else {
1214 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1215 Chain, Ptr,
1216 DAG.getTargetConstant(0, MVT::i32), // Channel
1217 Op.getOperand(2));
1218 }
1219
1220 SDValue Ops[2];
1221 Ops[0] = LoweredLoad;
1222 Ops[1] = Chain;
1223
1224 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001225}
Tom Stellard75aadc22012-12-11 21:25:42 +00001226
Tom Stellard75aadc22012-12-11 21:25:42 +00001227/// XXX Only kernel functions are supported, so we can assume for now that
1228/// every function is a kernel function, but in the future we should use
1229/// separate calling conventions for kernel and non-kernel functions.
1230SDValue R600TargetLowering::LowerFormalArguments(
1231 SDValue Chain,
1232 CallingConv::ID CallConv,
1233 bool isVarArg,
1234 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001235 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001236 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001237 SmallVector<CCValAssign, 16> ArgLocs;
1238 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1239 getTargetMachine(), ArgLocs, *DAG.getContext());
1240
1241 AnalyzeFormalArguments(CCInfo, Ins);
1242
Tom Stellard1e803092013-07-23 01:48:18 +00001243 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001244 CCValAssign &VA = ArgLocs[i];
1245 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001246
Tom Stellard75aadc22012-12-11 21:25:42 +00001247 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001248 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001249
1250 // The first 36 bytes of the input buffer contains information about
1251 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001252 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001253 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001254 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1255 false, false, 4); // 4 is the prefered alignment for
1256 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001257 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001258 }
1259 return Chain;
1260}
1261
Matt Arsenault758659232013-05-18 00:21:46 +00001262EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001263 if (!VT.isVector()) return MVT::i32;
1264 return VT.changeVectorElementTypeToInteger();
1265}
1266
Benjamin Kramer193960c2013-06-11 13:32:25 +00001267static SDValue
1268CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1269 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001270 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1271 assert(RemapSwizzle.empty());
1272 SDValue NewBldVec[4] = {
1273 VectorEntry.getOperand(0),
1274 VectorEntry.getOperand(1),
1275 VectorEntry.getOperand(2),
1276 VectorEntry.getOperand(3)
1277 };
1278
1279 for (unsigned i = 0; i < 4; i++) {
1280 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1281 if (C->isZero()) {
1282 RemapSwizzle[i] = 4; // SEL_0
1283 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1284 } else if (C->isExactlyValue(1.0)) {
1285 RemapSwizzle[i] = 5; // SEL_1
1286 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1287 }
1288 }
1289
1290 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1291 continue;
1292 for (unsigned j = 0; j < i; j++) {
1293 if (NewBldVec[i] == NewBldVec[j]) {
1294 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1295 RemapSwizzle[i] = j;
1296 break;
1297 }
1298 }
1299 }
1300
1301 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1302 VectorEntry.getValueType(), NewBldVec, 4);
1303}
1304
Benjamin Kramer193960c2013-06-11 13:32:25 +00001305static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1306 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001307 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1308 assert(RemapSwizzle.empty());
1309 SDValue NewBldVec[4] = {
1310 VectorEntry.getOperand(0),
1311 VectorEntry.getOperand(1),
1312 VectorEntry.getOperand(2),
1313 VectorEntry.getOperand(3)
1314 };
1315 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001316 for (unsigned i = 0; i < 4; i++)
1317 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001318
1319 for (unsigned i = 0; i < 4; i++) {
1320 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1321 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1322 ->getZExtValue();
1323 if (!isUnmovable[Idx]) {
1324 // Swap i and Idx
1325 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001326 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001327 }
1328 isUnmovable[Idx] = true;
1329 }
1330 }
1331
1332 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1333 VectorEntry.getValueType(), NewBldVec, 4);
1334}
1335
1336
1337SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1338SDValue Swz[4], SelectionDAG &DAG) const {
1339 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1340 // Old -> New swizzle values
1341 DenseMap<unsigned, unsigned> SwizzleRemap;
1342
1343 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1344 for (unsigned i = 0; i < 4; i++) {
1345 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1346 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1347 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1348 }
1349
1350 SwizzleRemap.clear();
1351 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1352 for (unsigned i = 0; i < 4; i++) {
1353 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1354 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1355 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1356 }
1357
1358 return BuildVector;
1359}
1360
1361
Tom Stellard75aadc22012-12-11 21:25:42 +00001362//===----------------------------------------------------------------------===//
1363// Custom DAG Optimizations
1364//===----------------------------------------------------------------------===//
1365
1366SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1367 DAGCombinerInfo &DCI) const {
1368 SelectionDAG &DAG = DCI.DAG;
1369
1370 switch (N->getOpcode()) {
1371 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1372 case ISD::FP_ROUND: {
1373 SDValue Arg = N->getOperand(0);
1374 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001375 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001376 Arg.getOperand(0));
1377 }
1378 break;
1379 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001380
1381 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1382 // (i32 select_cc f32, f32, -1, 0 cc)
1383 //
1384 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1385 // this to one of the SET*_DX10 instructions.
1386 case ISD::FP_TO_SINT: {
1387 SDValue FNeg = N->getOperand(0);
1388 if (FNeg.getOpcode() != ISD::FNEG) {
1389 return SDValue();
1390 }
1391 SDValue SelectCC = FNeg.getOperand(0);
1392 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1393 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1394 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1395 !isHWTrueValue(SelectCC.getOperand(2)) ||
1396 !isHWFalseValue(SelectCC.getOperand(3))) {
1397 return SDValue();
1398 }
1399
Andrew Trickef9de2a2013-05-25 02:42:55 +00001400 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001401 SelectCC.getOperand(0), // LHS
1402 SelectCC.getOperand(1), // RHS
1403 DAG.getConstant(-1, MVT::i32), // True
1404 DAG.getConstant(0, MVT::i32), // Flase
1405 SelectCC.getOperand(4)); // CC
1406
1407 break;
1408 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001409
1410 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1411 // => build_vector elt0, …, NewEltIdx, …, eltN
1412 case ISD::INSERT_VECTOR_ELT: {
1413 SDValue InVec = N->getOperand(0);
1414 SDValue InVal = N->getOperand(1);
1415 SDValue EltNo = N->getOperand(2);
1416 SDLoc dl(N);
1417
1418 // If the inserted element is an UNDEF, just use the input vector.
1419 if (InVal.getOpcode() == ISD::UNDEF)
1420 return InVec;
1421
1422 EVT VT = InVec.getValueType();
1423
1424 // If we can't generate a legal BUILD_VECTOR, exit
1425 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1426 return SDValue();
1427
1428 // Check that we know which element is being inserted
1429 if (!isa<ConstantSDNode>(EltNo))
1430 return SDValue();
1431 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1432
1433 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1434 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1435 // vector elements.
1436 SmallVector<SDValue, 8> Ops;
1437 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1438 Ops.append(InVec.getNode()->op_begin(),
1439 InVec.getNode()->op_end());
1440 } else if (InVec.getOpcode() == ISD::UNDEF) {
1441 unsigned NElts = VT.getVectorNumElements();
1442 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1443 } else {
1444 return SDValue();
1445 }
1446
1447 // Insert the element
1448 if (Elt < Ops.size()) {
1449 // All the operands of BUILD_VECTOR must have the same type;
1450 // we enforce that here.
1451 EVT OpVT = Ops[0].getValueType();
1452 if (InVal.getValueType() != OpVT)
1453 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1454 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1455 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1456 Ops[Elt] = InVal;
1457 }
1458
1459 // Return the new vector
1460 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1461 VT, &Ops[0], Ops.size());
1462 }
1463
Tom Stellard365366f2013-01-23 02:09:06 +00001464 // Extract_vec (Build_vector) generated by custom lowering
1465 // also needs to be customly combined
1466 case ISD::EXTRACT_VECTOR_ELT: {
1467 SDValue Arg = N->getOperand(0);
1468 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1469 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1470 unsigned Element = Const->getZExtValue();
1471 return Arg->getOperand(Element);
1472 }
1473 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001474 if (Arg.getOpcode() == ISD::BITCAST &&
1475 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1476 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1477 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001478 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001479 Arg->getOperand(0).getOperand(Element));
1480 }
1481 }
Tom Stellard365366f2013-01-23 02:09:06 +00001482 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001483
1484 case ISD::SELECT_CC: {
1485 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1486 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001487 //
1488 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1489 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001490 SDValue LHS = N->getOperand(0);
1491 if (LHS.getOpcode() != ISD::SELECT_CC) {
1492 return SDValue();
1493 }
1494
1495 SDValue RHS = N->getOperand(1);
1496 SDValue True = N->getOperand(2);
1497 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001498 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001499
1500 if (LHS.getOperand(2).getNode() != True.getNode() ||
1501 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001502 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001503 return SDValue();
1504 }
1505
Tom Stellard5e524892013-03-08 15:37:11 +00001506 switch (NCC) {
1507 default: return SDValue();
1508 case ISD::SETNE: return LHS;
1509 case ISD::SETEQ: {
1510 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1511 LHSCC = ISD::getSetCCInverse(LHSCC,
1512 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001513 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001514 LHS.getOperand(0),
1515 LHS.getOperand(1),
1516 LHS.getOperand(2),
1517 LHS.getOperand(3),
1518 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001519 }
Tom Stellard5e524892013-03-08 15:37:11 +00001520 }
1521 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001522 case AMDGPUISD::EXPORT: {
1523 SDValue Arg = N->getOperand(1);
1524 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1525 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001526
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001527 SDValue NewArgs[8] = {
1528 N->getOperand(0), // Chain
1529 SDValue(),
1530 N->getOperand(2), // ArrayBase
1531 N->getOperand(3), // Type
1532 N->getOperand(4), // SWZ_X
1533 N->getOperand(5), // SWZ_Y
1534 N->getOperand(6), // SWZ_Z
1535 N->getOperand(7) // SWZ_W
1536 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001537 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001538 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001539 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001540 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001541 case AMDGPUISD::TEXTURE_FETCH: {
1542 SDValue Arg = N->getOperand(1);
1543 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1544 break;
1545
1546 SDValue NewArgs[19] = {
1547 N->getOperand(0),
1548 N->getOperand(1),
1549 N->getOperand(2),
1550 N->getOperand(3),
1551 N->getOperand(4),
1552 N->getOperand(5),
1553 N->getOperand(6),
1554 N->getOperand(7),
1555 N->getOperand(8),
1556 N->getOperand(9),
1557 N->getOperand(10),
1558 N->getOperand(11),
1559 N->getOperand(12),
1560 N->getOperand(13),
1561 N->getOperand(14),
1562 N->getOperand(15),
1563 N->getOperand(16),
1564 N->getOperand(17),
1565 N->getOperand(18),
1566 };
1567 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1568 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1569 NewArgs, 19);
1570 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001571 }
1572 return SDValue();
1573}