blob: da06dfedc3ee273aa42ddcf1977f560539e4e25f [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000023#include "llvm/IR/Argument.h"
24#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025
26using namespace llvm;
27
28R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
29 AMDGPUTargetLowering(TM),
30 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
Tom Stellard75aadc22012-12-11 21:25:42 +000031 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35 computeRegisterProperties();
36
37 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
40 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
41
42 setOperationAction(ISD::ADD, MVT::v4i32, Expand);
43 setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000044 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
45 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
Tom Stellard3deddc52013-05-10 02:09:34 +000046 setOperationAction(ISD::MUL, MVT::v2i32, Expand);
47 setOperationAction(ISD::MUL, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000048 setOperationAction(ISD::OR, MVT::v4i32, Expand);
49 setOperationAction(ISD::OR, MVT::v2i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000050 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000051 setOperationAction(ISD::SHL, MVT::v4i32, Expand);
52 setOperationAction(ISD::SHL, MVT::v2i32, Expand);
53 setOperationAction(ISD::SRL, MVT::v4i32, Expand);
54 setOperationAction(ISD::SRL, MVT::v2i32, Expand);
Tom Stellard7fb39632013-05-10 02:09:29 +000055 setOperationAction(ISD::SRA, MVT::v4i32, Expand);
56 setOperationAction(ISD::SRA, MVT::v2i32, Expand);
Tom Stellard3a7c34c2013-05-10 02:09:39 +000057 setOperationAction(ISD::SUB, MVT::v4i32, Expand);
58 setOperationAction(ISD::SUB, MVT::v2i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000059 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000060 setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
61 setOperationAction(ISD::UREM, MVT::v4i32, Expand);
62 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000063 setOperationAction(ISD::XOR, MVT::v4i32, Expand);
64 setOperationAction(ISD::XOR, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
75 setOperationAction(ISD::ROTL, MVT::i32, Custom);
76
77 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
78 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
79
Tom Stellarde8f9f282013-03-08 15:37:05 +000080 setOperationAction(ISD::SETCC, MVT::i32, Expand);
81 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000082 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
83
84 setOperationAction(ISD::SELECT, MVT::i32, Custom);
85 setOperationAction(ISD::SELECT, MVT::f32, Custom);
86
Tom Stellarda99c6ae2013-05-10 02:09:24 +000087 setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
88 setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
89
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000090 // Legalize loads and stores to the private address space.
91 setOperationAction(ISD::LOAD, MVT::i32, Custom);
92 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
93 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
94 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
95 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
98 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000099 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000100 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000101 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
102
Tom Stellard365366f2013-01-23 02:09:06 +0000103 setOperationAction(ISD::LOAD, MVT::i32, Custom);
104 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000105 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
106
Tom Stellard75aadc22012-12-11 21:25:42 +0000107 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000108 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000109 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000110 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +0000111
Tom Stellardb852af52013-03-08 15:37:03 +0000112 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000113 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setSchedulingPreference(Sched::VLIW);
115}
116
117MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
118 MachineInstr * MI, MachineBasicBlock * BB) const {
119 MachineFunction * MF = BB->getParent();
120 MachineRegisterInfo &MRI = MF->getRegInfo();
121 MachineBasicBlock::iterator I = *MI;
122
123 switch (MI->getOpcode()) {
124 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125 case AMDGPU::CLAMP_R600: {
126 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
127 AMDGPU::MOV,
128 MI->getOperand(0).getReg(),
129 MI->getOperand(1).getReg());
130 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
131 break;
132 }
133
134 case AMDGPU::FABS_R600: {
135 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
136 AMDGPU::MOV,
137 MI->getOperand(0).getReg(),
138 MI->getOperand(1).getReg());
139 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
140 break;
141 }
142
143 case AMDGPU::FNEG_R600: {
144 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
145 AMDGPU::MOV,
146 MI->getOperand(0).getReg(),
147 MI->getOperand(1).getReg());
148 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
149 break;
150 }
151
Tom Stellard75aadc22012-12-11 21:25:42 +0000152 case AMDGPU::MASK_WRITE: {
153 unsigned maskedRegister = MI->getOperand(0).getReg();
154 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
155 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
156 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
157 break;
158 }
159
160 case AMDGPU::MOV_IMM_F32:
161 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
162 MI->getOperand(1).getFPImm()->getValueAPF()
163 .bitcastToAPInt().getZExtValue());
164 break;
165 case AMDGPU::MOV_IMM_I32:
166 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
167 MI->getOperand(1).getImm());
168 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000169 case AMDGPU::CONST_COPY: {
170 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
171 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
172 TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
173 MI->getOperand(1).getImm());
174 break;
175 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000176
177 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
178 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
179 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
180
181 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
182 .addOperand(MI->getOperand(0))
183 .addOperand(MI->getOperand(1))
184 .addImm(EOP); // Set End of program bit
185 break;
186 }
187
Tom Stellard75aadc22012-12-11 21:25:42 +0000188 case AMDGPU::TXD: {
189 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
190 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000191 MachineOperand &RID = MI->getOperand(4);
192 MachineOperand &SID = MI->getOperand(5);
193 unsigned TextureId = MI->getOperand(6).getImm();
194 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
195 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000196
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000197 switch (TextureId) {
198 case 5: // Rect
199 CTX = CTY = 0;
200 break;
201 case 6: // Shadow1D
202 SrcW = SrcZ;
203 break;
204 case 7: // Shadow2D
205 SrcW = SrcZ;
206 break;
207 case 8: // ShadowRect
208 CTX = CTY = 0;
209 SrcW = SrcZ;
210 break;
211 case 9: // 1DArray
212 SrcZ = SrcY;
213 CTZ = 0;
214 break;
215 case 10: // 2DArray
216 CTZ = 0;
217 break;
218 case 11: // Shadow1DArray
219 SrcZ = SrcY;
220 CTZ = 0;
221 break;
222 case 12: // Shadow2DArray
223 CTZ = 0;
224 break;
225 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000226 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
227 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 .addImm(SrcX)
229 .addImm(SrcY)
230 .addImm(SrcZ)
231 .addImm(SrcW)
232 .addImm(0)
233 .addImm(0)
234 .addImm(0)
235 .addImm(0)
236 .addImm(1)
237 .addImm(2)
238 .addImm(3)
239 .addOperand(RID)
240 .addOperand(SID)
241 .addImm(CTX)
242 .addImm(CTY)
243 .addImm(CTZ)
244 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000245 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
246 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000247 .addImm(SrcX)
248 .addImm(SrcY)
249 .addImm(SrcZ)
250 .addImm(SrcW)
251 .addImm(0)
252 .addImm(0)
253 .addImm(0)
254 .addImm(0)
255 .addImm(1)
256 .addImm(2)
257 .addImm(3)
258 .addOperand(RID)
259 .addOperand(SID)
260 .addImm(CTX)
261 .addImm(CTY)
262 .addImm(CTZ)
263 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000264 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
265 .addOperand(MI->getOperand(0))
266 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000267 .addImm(SrcX)
268 .addImm(SrcY)
269 .addImm(SrcZ)
270 .addImm(SrcW)
271 .addImm(0)
272 .addImm(0)
273 .addImm(0)
274 .addImm(0)
275 .addImm(1)
276 .addImm(2)
277 .addImm(3)
278 .addOperand(RID)
279 .addOperand(SID)
280 .addImm(CTX)
281 .addImm(CTY)
282 .addImm(CTZ)
283 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000284 .addReg(T0, RegState::Implicit)
285 .addReg(T1, RegState::Implicit);
286 break;
287 }
288
289 case AMDGPU::TXD_SHADOW: {
290 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
291 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000292 MachineOperand &RID = MI->getOperand(4);
293 MachineOperand &SID = MI->getOperand(5);
294 unsigned TextureId = MI->getOperand(6).getImm();
295 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
296 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
297
298 switch (TextureId) {
299 case 5: // Rect
300 CTX = CTY = 0;
301 break;
302 case 6: // Shadow1D
303 SrcW = SrcZ;
304 break;
305 case 7: // Shadow2D
306 SrcW = SrcZ;
307 break;
308 case 8: // ShadowRect
309 CTX = CTY = 0;
310 SrcW = SrcZ;
311 break;
312 case 9: // 1DArray
313 SrcZ = SrcY;
314 CTZ = 0;
315 break;
316 case 10: // 2DArray
317 CTZ = 0;
318 break;
319 case 11: // Shadow1DArray
320 SrcZ = SrcY;
321 CTZ = 0;
322 break;
323 case 12: // Shadow2DArray
324 CTZ = 0;
325 break;
326 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000327
328 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
329 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000330 .addImm(SrcX)
331 .addImm(SrcY)
332 .addImm(SrcZ)
333 .addImm(SrcW)
334 .addImm(0)
335 .addImm(0)
336 .addImm(0)
337 .addImm(0)
338 .addImm(1)
339 .addImm(2)
340 .addImm(3)
341 .addOperand(RID)
342 .addOperand(SID)
343 .addImm(CTX)
344 .addImm(CTY)
345 .addImm(CTZ)
346 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000347 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
348 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000349 .addImm(SrcX)
350 .addImm(SrcY)
351 .addImm(SrcZ)
352 .addImm(SrcW)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(0)
357 .addImm(1)
358 .addImm(2)
359 .addImm(3)
360 .addOperand(RID)
361 .addOperand(SID)
362 .addImm(CTX)
363 .addImm(CTY)
364 .addImm(CTZ)
365 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000366 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
367 .addOperand(MI->getOperand(0))
368 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000369 .addImm(SrcX)
370 .addImm(SrcY)
371 .addImm(SrcZ)
372 .addImm(SrcW)
373 .addImm(0)
374 .addImm(0)
375 .addImm(0)
376 .addImm(0)
377 .addImm(1)
378 .addImm(2)
379 .addImm(3)
380 .addOperand(RID)
381 .addOperand(SID)
382 .addImm(CTX)
383 .addImm(CTY)
384 .addImm(CTZ)
385 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000386 .addReg(T0, RegState::Implicit)
387 .addReg(T1, RegState::Implicit);
388 break;
389 }
390
391 case AMDGPU::BRANCH:
392 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000393 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 break;
395
396 case AMDGPU::BRANCH_COND_f32: {
397 MachineInstr *NewMI =
398 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
399 AMDGPU::PREDICATE_BIT)
400 .addOperand(MI->getOperand(1))
401 .addImm(OPCODE_IS_NOT_ZERO)
402 .addImm(0); // Flags
403 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000404 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000405 .addOperand(MI->getOperand(0))
406 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
407 break;
408 }
409
410 case AMDGPU::BRANCH_COND_i32: {
411 MachineInstr *NewMI =
412 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
413 AMDGPU::PREDICATE_BIT)
414 .addOperand(MI->getOperand(1))
415 .addImm(OPCODE_IS_NOT_ZERO_INT)
416 .addImm(0); // Flags
417 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000418 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000419 .addOperand(MI->getOperand(0))
420 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
421 break;
422 }
423
Tom Stellard75aadc22012-12-11 21:25:42 +0000424 case AMDGPU::EG_ExportSwz:
425 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000426 // Instruction is left unmodified if its not the last one of its type
427 bool isLastInstructionOfItsType = true;
428 unsigned InstExportType = MI->getOperand(1).getImm();
429 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
430 EndBlock = BB->end(); NextExportInst != EndBlock;
431 NextExportInst = llvm::next(NextExportInst)) {
432 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
433 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
434 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
435 .getImm();
436 if (CurrentInstExportType == InstExportType) {
437 isLastInstructionOfItsType = false;
438 break;
439 }
440 }
441 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000442 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000443 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000444 return BB;
445 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
446 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
447 .addOperand(MI->getOperand(0))
448 .addOperand(MI->getOperand(1))
449 .addOperand(MI->getOperand(2))
450 .addOperand(MI->getOperand(3))
451 .addOperand(MI->getOperand(4))
452 .addOperand(MI->getOperand(5))
453 .addOperand(MI->getOperand(6))
454 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000456 break;
457 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000458 case AMDGPU::RETURN: {
459 // RETURN instructions must have the live-out registers as implicit uses,
460 // otherwise they appear dead.
461 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
462 MachineInstrBuilder MIB(*MF, MI);
463 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
464 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
465 return BB;
466 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000467 }
468
469 MI->eraseFromParent();
470 return BB;
471}
472
473//===----------------------------------------------------------------------===//
474// Custom DAG Lowering Operations
475//===----------------------------------------------------------------------===//
476
477using namespace llvm::Intrinsic;
478using namespace llvm::AMDGPUIntrinsic;
479
Tom Stellard75aadc22012-12-11 21:25:42 +0000480SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
481 switch (Op.getOpcode()) {
482 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000483 case ISD::ROTL: return LowerROTL(Op, DAG);
484 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
485 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000487 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000488 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000489 case ISD::INTRINSIC_VOID: {
490 SDValue Chain = Op.getOperand(0);
491 unsigned IntrinsicID =
492 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
493 switch (IntrinsicID) {
494 case AMDGPUIntrinsic::AMDGPU_store_output: {
495 MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000496 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000497 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
498 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000499 MFI->LiveOuts.push_back(Reg);
Tom Stellard75aadc22012-12-11 21:25:42 +0000500 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
501 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000502 case AMDGPUIntrinsic::R600_store_swizzle: {
503 const SDValue Args[8] = {
504 Chain,
505 Op.getOperand(2), // Export Value
506 Op.getOperand(3), // ArrayBase
507 Op.getOperand(4), // Type
508 DAG.getConstant(0, MVT::i32), // SWZ_X
509 DAG.getConstant(1, MVT::i32), // SWZ_Y
510 DAG.getConstant(2, MVT::i32), // SWZ_Z
511 DAG.getConstant(3, MVT::i32) // SWZ_W
512 };
513 return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
514 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000516
Tom Stellard75aadc22012-12-11 21:25:42 +0000517 // default for switch(IntrinsicID)
518 default: break;
519 }
520 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
521 break;
522 }
523 case ISD::INTRINSIC_WO_CHAIN: {
524 unsigned IntrinsicID =
525 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
526 EVT VT = Op.getValueType();
527 DebugLoc DL = Op.getDebugLoc();
528 switch(IntrinsicID) {
529 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
530 case AMDGPUIntrinsic::R600_load_input: {
531 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
532 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000533 MachineFunction &MF = DAG.getMachineFunction();
534 MachineRegisterInfo &MRI = MF.getRegInfo();
535 MRI.addLiveIn(Reg);
536 return DAG.getCopyFromReg(DAG.getEntryNode(),
537 DAG.getEntryNode().getDebugLoc(), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000538 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000539
540 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000541 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000542 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
543 MachineSDNode *interp;
544 if (ijb < 0) {
545 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
546 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
547 return DAG.getTargetExtractSubreg(
548 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
549 DL, MVT::f32, SDValue(interp, 0));
550 }
551
552 if (slot % 4 < 2)
553 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
554 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
555 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
556 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
557 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
558 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
559 else
560 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
561 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
562 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
563 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
564 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
565 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
566
567 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000568 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000569 case AMDGPUIntrinsic::R600_tex:
570 case AMDGPUIntrinsic::R600_texc:
571 case AMDGPUIntrinsic::R600_txl:
572 case AMDGPUIntrinsic::R600_txlc:
573 case AMDGPUIntrinsic::R600_txb:
574 case AMDGPUIntrinsic::R600_txbc:
575 case AMDGPUIntrinsic::R600_txf:
576 case AMDGPUIntrinsic::R600_txq:
577 case AMDGPUIntrinsic::R600_ddx:
578 case AMDGPUIntrinsic::R600_ddy: {
579 unsigned TextureOp;
580 switch (IntrinsicID) {
581 case AMDGPUIntrinsic::R600_tex:
582 TextureOp = 0;
583 break;
584 case AMDGPUIntrinsic::R600_texc:
585 TextureOp = 1;
586 break;
587 case AMDGPUIntrinsic::R600_txl:
588 TextureOp = 2;
589 break;
590 case AMDGPUIntrinsic::R600_txlc:
591 TextureOp = 3;
592 break;
593 case AMDGPUIntrinsic::R600_txb:
594 TextureOp = 4;
595 break;
596 case AMDGPUIntrinsic::R600_txbc:
597 TextureOp = 5;
598 break;
599 case AMDGPUIntrinsic::R600_txf:
600 TextureOp = 6;
601 break;
602 case AMDGPUIntrinsic::R600_txq:
603 TextureOp = 7;
604 break;
605 case AMDGPUIntrinsic::R600_ddx:
606 TextureOp = 8;
607 break;
608 case AMDGPUIntrinsic::R600_ddy:
609 TextureOp = 9;
610 break;
611 default:
612 llvm_unreachable("Unknow Texture Operation");
613 }
614
615 SDValue TexArgs[19] = {
616 DAG.getConstant(TextureOp, MVT::i32),
617 Op.getOperand(1),
618 DAG.getConstant(0, MVT::i32),
619 DAG.getConstant(1, MVT::i32),
620 DAG.getConstant(2, MVT::i32),
621 DAG.getConstant(3, MVT::i32),
622 Op.getOperand(2),
623 Op.getOperand(3),
624 Op.getOperand(4),
625 DAG.getConstant(0, MVT::i32),
626 DAG.getConstant(1, MVT::i32),
627 DAG.getConstant(2, MVT::i32),
628 DAG.getConstant(3, MVT::i32),
629 Op.getOperand(5),
630 Op.getOperand(6),
631 Op.getOperand(7),
632 Op.getOperand(8),
633 Op.getOperand(9),
634 Op.getOperand(10)
635 };
636 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
637 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000638 case AMDGPUIntrinsic::AMDGPU_dp4: {
639 SDValue Args[8] = {
640 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
641 DAG.getConstant(0, MVT::i32)),
642 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
643 DAG.getConstant(0, MVT::i32)),
644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
645 DAG.getConstant(1, MVT::i32)),
646 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
647 DAG.getConstant(1, MVT::i32)),
648 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
649 DAG.getConstant(2, MVT::i32)),
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
651 DAG.getConstant(2, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
653 DAG.getConstant(3, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
655 DAG.getConstant(3, MVT::i32))
656 };
657 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
658 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000659
660 case r600_read_ngroups_x:
661 return LowerImplicitParameter(DAG, VT, DL, 0);
662 case r600_read_ngroups_y:
663 return LowerImplicitParameter(DAG, VT, DL, 1);
664 case r600_read_ngroups_z:
665 return LowerImplicitParameter(DAG, VT, DL, 2);
666 case r600_read_global_size_x:
667 return LowerImplicitParameter(DAG, VT, DL, 3);
668 case r600_read_global_size_y:
669 return LowerImplicitParameter(DAG, VT, DL, 4);
670 case r600_read_global_size_z:
671 return LowerImplicitParameter(DAG, VT, DL, 5);
672 case r600_read_local_size_x:
673 return LowerImplicitParameter(DAG, VT, DL, 6);
674 case r600_read_local_size_y:
675 return LowerImplicitParameter(DAG, VT, DL, 7);
676 case r600_read_local_size_z:
677 return LowerImplicitParameter(DAG, VT, DL, 8);
678
679 case r600_read_tgid_x:
680 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
681 AMDGPU::T1_X, VT);
682 case r600_read_tgid_y:
683 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
684 AMDGPU::T1_Y, VT);
685 case r600_read_tgid_z:
686 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
687 AMDGPU::T1_Z, VT);
688 case r600_read_tidig_x:
689 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
690 AMDGPU::T0_X, VT);
691 case r600_read_tidig_y:
692 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
693 AMDGPU::T0_Y, VT);
694 case r600_read_tidig_z:
695 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
696 AMDGPU::T0_Z, VT);
697 }
698 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
699 break;
700 }
701 } // end switch(Op.getOpcode())
702 return SDValue();
703}
704
705void R600TargetLowering::ReplaceNodeResults(SDNode *N,
706 SmallVectorImpl<SDValue> &Results,
707 SelectionDAG &DAG) const {
708 switch (N->getOpcode()) {
709 default: return;
710 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000711 return;
712 case ISD::LOAD: {
713 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
714 Results.push_back(SDValue(Node, 0));
715 Results.push_back(SDValue(Node, 1));
716 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
717 // function
718 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
719 return;
720 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000721 case ISD::STORE:
722 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
723 Results.push_back(SDValue(Node, 0));
724 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000725 }
726}
727
728SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
729 return DAG.getNode(
730 ISD::SETCC,
731 Op.getDebugLoc(),
732 MVT::i1,
733 Op, DAG.getConstantFP(0.0f, MVT::f32),
734 DAG.getCondCode(ISD::SETNE)
735 );
736}
737
Tom Stellard75aadc22012-12-11 21:25:42 +0000738SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
739 DebugLoc DL,
740 unsigned DwordOffset) const {
741 unsigned ByteOffset = DwordOffset * 4;
742 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
743 AMDGPUAS::PARAM_I_ADDRESS);
744
745 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
746 assert(isInt<16>(ByteOffset));
747
748 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
749 DAG.getConstant(ByteOffset, MVT::i32), // PTR
750 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
751 false, false, false, 0);
752}
753
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000754SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
755
756 MachineFunction &MF = DAG.getMachineFunction();
757 const AMDGPUFrameLowering *TFL =
758 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
759
760 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
761 assert(FIN);
762
763 unsigned FrameIndex = FIN->getIndex();
764 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
765 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
766}
767
Tom Stellard75aadc22012-12-11 21:25:42 +0000768SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
769 DebugLoc DL = Op.getDebugLoc();
770 EVT VT = Op.getValueType();
771
772 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
773 Op.getOperand(0),
774 Op.getOperand(0),
775 DAG.getNode(ISD::SUB, DL, VT,
776 DAG.getConstant(32, MVT::i32),
777 Op.getOperand(1)));
778}
779
780bool R600TargetLowering::isZero(SDValue Op) const {
781 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
782 return Cst->isNullValue();
783 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
784 return CstFP->isZero();
785 } else {
786 return false;
787 }
788}
789
790SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
791 DebugLoc DL = Op.getDebugLoc();
792 EVT VT = Op.getValueType();
793
794 SDValue LHS = Op.getOperand(0);
795 SDValue RHS = Op.getOperand(1);
796 SDValue True = Op.getOperand(2);
797 SDValue False = Op.getOperand(3);
798 SDValue CC = Op.getOperand(4);
799 SDValue Temp;
800
801 // LHS and RHS are guaranteed to be the same value type
802 EVT CompareVT = LHS.getValueType();
803
804 // Check if we can lower this to a native operation.
805
Tom Stellard2add82d2013-03-08 15:37:09 +0000806 // Try to lower to a SET* instruction:
807 //
808 // SET* can match the following patterns:
809 //
810 // select_cc f32, f32, -1, 0, cc_any
811 // select_cc f32, f32, 1.0f, 0.0f, cc_any
812 // select_cc i32, i32, -1, 0, cc_any
813 //
814
815 // Move hardware True/False values to the correct operand.
816 if (isHWTrueValue(False) && isHWFalseValue(True)) {
817 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
818 std::swap(False, True);
819 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
820 }
821
822 if (isHWTrueValue(True) && isHWFalseValue(False) &&
823 (CompareVT == VT || VT == MVT::i32)) {
824 // This can be matched by a SET* instruction.
825 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
826 }
827
Tom Stellard75aadc22012-12-11 21:25:42 +0000828 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000829 //
830 // CND* can match the following patterns:
831 //
832 // select_cc f32, 0.0, f32, f32, cc_any
833 // select_cc f32, 0.0, i32, i32, cc_any
834 // select_cc i32, 0, f32, f32, cc_any
835 // select_cc i32, 0, i32, i32, cc_any
836 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000837 if (isZero(LHS) || isZero(RHS)) {
838 SDValue Cond = (isZero(LHS) ? RHS : LHS);
839 SDValue Zero = (isZero(LHS) ? LHS : RHS);
840 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
841 if (CompareVT != VT) {
842 // Bitcast True / False to the correct types. This will end up being
843 // a nop, but it allows us to define only a single pattern in the
844 // .TD files for each CND* instruction rather than having to have
845 // one pattern for integer True/False and one for fp True/False
846 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
847 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
848 }
849 if (isZero(LHS)) {
850 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
851 }
852
853 switch (CCOpcode) {
854 case ISD::SETONE:
855 case ISD::SETUNE:
856 case ISD::SETNE:
857 case ISD::SETULE:
858 case ISD::SETULT:
859 case ISD::SETOLE:
860 case ISD::SETOLT:
861 case ISD::SETLE:
862 case ISD::SETLT:
863 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
864 Temp = True;
865 True = False;
866 False = Temp;
867 break;
868 default:
869 break;
870 }
871 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
872 Cond, Zero,
873 True, False,
874 DAG.getCondCode(CCOpcode));
875 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
876 }
877
Tom Stellard75aadc22012-12-11 21:25:42 +0000878
879 // Possible Min/Max pattern
880 SDValue MinMax = LowerMinMax(Op, DAG);
881 if (MinMax.getNode()) {
882 return MinMax;
883 }
884
885 // If we make it this for it means we have no native instructions to handle
886 // this SELECT_CC, so we must lower it.
887 SDValue HWTrue, HWFalse;
888
889 if (CompareVT == MVT::f32) {
890 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
891 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
892 } else if (CompareVT == MVT::i32) {
893 HWTrue = DAG.getConstant(-1, CompareVT);
894 HWFalse = DAG.getConstant(0, CompareVT);
895 }
896 else {
897 assert(!"Unhandled value type in LowerSELECT_CC");
898 }
899
900 // Lower this unsupported SELECT_CC into a combination of two supported
901 // SELECT_CC operations.
902 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
903
904 return DAG.getNode(ISD::SELECT_CC, DL, VT,
905 Cond, HWFalse,
906 True, False,
907 DAG.getCondCode(ISD::SETNE));
908}
909
910SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
911 return DAG.getNode(ISD::SELECT_CC,
912 Op.getDebugLoc(),
913 Op.getValueType(),
914 Op.getOperand(0),
915 DAG.getConstant(0, MVT::i32),
916 Op.getOperand(1),
917 Op.getOperand(2),
918 DAG.getCondCode(ISD::SETNE));
919}
920
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000921/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
922/// convert these pointers to a register index. Each register holds
923/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
924/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
925/// for indirect addressing.
926SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
927 unsigned StackWidth,
928 SelectionDAG &DAG) const {
929 unsigned SRLPad;
930 switch(StackWidth) {
931 case 1:
932 SRLPad = 2;
933 break;
934 case 2:
935 SRLPad = 3;
936 break;
937 case 4:
938 SRLPad = 4;
939 break;
940 default: llvm_unreachable("Invalid stack width");
941 }
942
943 return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
944 DAG.getConstant(SRLPad, MVT::i32));
945}
946
947void R600TargetLowering::getStackAddress(unsigned StackWidth,
948 unsigned ElemIdx,
949 unsigned &Channel,
950 unsigned &PtrIncr) const {
951 switch (StackWidth) {
952 default:
953 case 1:
954 Channel = 0;
955 if (ElemIdx > 0) {
956 PtrIncr = 1;
957 } else {
958 PtrIncr = 0;
959 }
960 break;
961 case 2:
962 Channel = ElemIdx % 2;
963 if (ElemIdx == 2) {
964 PtrIncr = 1;
965 } else {
966 PtrIncr = 0;
967 }
968 break;
969 case 4:
970 Channel = ElemIdx;
971 PtrIncr = 0;
972 break;
973 }
974}
975
Tom Stellard75aadc22012-12-11 21:25:42 +0000976SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
977 DebugLoc DL = Op.getDebugLoc();
978 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
979 SDValue Chain = Op.getOperand(0);
980 SDValue Value = Op.getOperand(1);
981 SDValue Ptr = Op.getOperand(2);
982
983 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
984 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
985 // Convert pointer from byte address to dword address.
986 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
987 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
988 Ptr, DAG.getConstant(2, MVT::i32)));
989
990 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
991 assert(!"Truncated and indexed stores not supported yet");
992 } else {
993 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
994 }
995 return Chain;
996 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000997
998 EVT ValueVT = Value.getValueType();
999
1000 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1001 return SDValue();
1002 }
1003
1004 // Lowering for indirect addressing
1005
1006 const MachineFunction &MF = DAG.getMachineFunction();
1007 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1008 getTargetMachine().getFrameLowering());
1009 unsigned StackWidth = TFL->getStackWidth(MF);
1010
1011 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1012
1013 if (ValueVT.isVector()) {
1014 unsigned NumElemVT = ValueVT.getVectorNumElements();
1015 EVT ElemVT = ValueVT.getVectorElementType();
1016 SDValue Stores[4];
1017
1018 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1019 "vector width in load");
1020
1021 for (unsigned i = 0; i < NumElemVT; ++i) {
1022 unsigned Channel, PtrIncr;
1023 getStackAddress(StackWidth, i, Channel, PtrIncr);
1024 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1025 DAG.getConstant(PtrIncr, MVT::i32));
1026 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1027 Value, DAG.getConstant(i, MVT::i32));
1028
1029 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1030 Chain, Elem, Ptr,
1031 DAG.getTargetConstant(Channel, MVT::i32));
1032 }
1033 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1034 } else {
1035 if (ValueVT == MVT::i8) {
1036 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1037 }
1038 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
1039 DAG.getTargetConstant(0, MVT::i32)); // Channel
1040 }
1041
1042 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001043}
1044
Tom Stellard365366f2013-01-23 02:09:06 +00001045// return (512 + (kc_bank << 12)
1046static int
1047ConstantAddressBlock(unsigned AddressSpace) {
1048 switch (AddressSpace) {
1049 case AMDGPUAS::CONSTANT_BUFFER_0:
1050 return 512;
1051 case AMDGPUAS::CONSTANT_BUFFER_1:
1052 return 512 + 4096;
1053 case AMDGPUAS::CONSTANT_BUFFER_2:
1054 return 512 + 4096 * 2;
1055 case AMDGPUAS::CONSTANT_BUFFER_3:
1056 return 512 + 4096 * 3;
1057 case AMDGPUAS::CONSTANT_BUFFER_4:
1058 return 512 + 4096 * 4;
1059 case AMDGPUAS::CONSTANT_BUFFER_5:
1060 return 512 + 4096 * 5;
1061 case AMDGPUAS::CONSTANT_BUFFER_6:
1062 return 512 + 4096 * 6;
1063 case AMDGPUAS::CONSTANT_BUFFER_7:
1064 return 512 + 4096 * 7;
1065 case AMDGPUAS::CONSTANT_BUFFER_8:
1066 return 512 + 4096 * 8;
1067 case AMDGPUAS::CONSTANT_BUFFER_9:
1068 return 512 + 4096 * 9;
1069 case AMDGPUAS::CONSTANT_BUFFER_10:
1070 return 512 + 4096 * 10;
1071 case AMDGPUAS::CONSTANT_BUFFER_11:
1072 return 512 + 4096 * 11;
1073 case AMDGPUAS::CONSTANT_BUFFER_12:
1074 return 512 + 4096 * 12;
1075 case AMDGPUAS::CONSTANT_BUFFER_13:
1076 return 512 + 4096 * 13;
1077 case AMDGPUAS::CONSTANT_BUFFER_14:
1078 return 512 + 4096 * 14;
1079 case AMDGPUAS::CONSTANT_BUFFER_15:
1080 return 512 + 4096 * 15;
1081 default:
1082 return -1;
1083 }
1084}
1085
1086SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1087{
1088 EVT VT = Op.getValueType();
1089 DebugLoc DL = Op.getDebugLoc();
1090 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1091 SDValue Chain = Op.getOperand(0);
1092 SDValue Ptr = Op.getOperand(1);
1093 SDValue LoweredLoad;
1094
1095 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1096 if (ConstantBlock > -1) {
1097 SDValue Result;
1098 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001099 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1100 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001101 SDValue Slots[4];
1102 for (unsigned i = 0; i < 4; i++) {
1103 // We want Const position encoded with the following formula :
1104 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1105 // const_index is Ptr computed by llvm using an alignment of 16.
1106 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1107 // then div by 4 at the ISel step
1108 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1109 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1110 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1111 }
1112 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1113 } else {
1114 // non constant ptr cant be folded, keeps it as a v4f32 load
1115 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001116 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001117 DAG.getConstant(LoadNode->getAddressSpace() -
1118 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001119 );
1120 }
1121
1122 if (!VT.isVector()) {
1123 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1124 DAG.getConstant(0, MVT::i32));
1125 }
1126
1127 SDValue MergedValues[2] = {
1128 Result,
1129 Chain
1130 };
1131 return DAG.getMergeValues(MergedValues, 2, DL);
1132 }
1133
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001134 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1135 return SDValue();
1136 }
1137
1138 // Lowering for indirect addressing
1139 const MachineFunction &MF = DAG.getMachineFunction();
1140 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1141 getTargetMachine().getFrameLowering());
1142 unsigned StackWidth = TFL->getStackWidth(MF);
1143
1144 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1145
1146 if (VT.isVector()) {
1147 unsigned NumElemVT = VT.getVectorNumElements();
1148 EVT ElemVT = VT.getVectorElementType();
1149 SDValue Loads[4];
1150
1151 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1152 "vector width in load");
1153
1154 for (unsigned i = 0; i < NumElemVT; ++i) {
1155 unsigned Channel, PtrIncr;
1156 getStackAddress(StackWidth, i, Channel, PtrIncr);
1157 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1158 DAG.getConstant(PtrIncr, MVT::i32));
1159 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1160 Chain, Ptr,
1161 DAG.getTargetConstant(Channel, MVT::i32),
1162 Op.getOperand(2));
1163 }
1164 for (unsigned i = NumElemVT; i < 4; ++i) {
1165 Loads[i] = DAG.getUNDEF(ElemVT);
1166 }
1167 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1168 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1169 } else {
1170 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1171 Chain, Ptr,
1172 DAG.getTargetConstant(0, MVT::i32), // Channel
1173 Op.getOperand(2));
1174 }
1175
1176 SDValue Ops[2];
1177 Ops[0] = LoweredLoad;
1178 Ops[1] = Chain;
1179
1180 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001181}
Tom Stellard75aadc22012-12-11 21:25:42 +00001182
Tom Stellard75aadc22012-12-11 21:25:42 +00001183/// XXX Only kernel functions are supported, so we can assume for now that
1184/// every function is a kernel function, but in the future we should use
1185/// separate calling conventions for kernel and non-kernel functions.
1186SDValue R600TargetLowering::LowerFormalArguments(
1187 SDValue Chain,
1188 CallingConv::ID CallConv,
1189 bool isVarArg,
1190 const SmallVectorImpl<ISD::InputArg> &Ins,
1191 DebugLoc DL, SelectionDAG &DAG,
1192 SmallVectorImpl<SDValue> &InVals) const {
1193 unsigned ParamOffsetBytes = 36;
1194 Function::const_arg_iterator FuncArg =
1195 DAG.getMachineFunction().getFunction()->arg_begin();
1196 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
1197 EVT VT = Ins[i].VT;
1198 Type *ArgType = FuncArg->getType();
1199 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
1200 32 : ArgType->getPrimitiveSizeInBits();
1201 unsigned ArgBytes = ArgSizeInBits >> 3;
1202 EVT ArgVT;
1203 if (ArgSizeInBits < VT.getSizeInBits()) {
1204 assert(!ArgType->isFloatTy() &&
1205 "Extending floating point arguments not supported yet");
1206 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
1207 } else {
1208 ArgVT = VT;
1209 }
1210 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1211 AMDGPUAS::PARAM_I_ADDRESS);
1212 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
1213 DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard8d469ed2013-02-19 15:22:44 +00001214 MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard75aadc22012-12-11 21:25:42 +00001215 ArgVT, false, false, ArgBytes);
1216 InVals.push_back(Arg);
1217 ParamOffsetBytes += ArgBytes;
1218 }
1219 return Chain;
1220}
1221
Matt Arsenault758659232013-05-18 00:21:46 +00001222EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001223 if (!VT.isVector()) return MVT::i32;
1224 return VT.changeVectorElementTypeToInteger();
1225}
1226
1227//===----------------------------------------------------------------------===//
1228// Custom DAG Optimizations
1229//===----------------------------------------------------------------------===//
1230
1231SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1232 DAGCombinerInfo &DCI) const {
1233 SelectionDAG &DAG = DCI.DAG;
1234
1235 switch (N->getOpcode()) {
1236 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1237 case ISD::FP_ROUND: {
1238 SDValue Arg = N->getOperand(0);
1239 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1240 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
1241 Arg.getOperand(0));
1242 }
1243 break;
1244 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001245
1246 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1247 // (i32 select_cc f32, f32, -1, 0 cc)
1248 //
1249 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1250 // this to one of the SET*_DX10 instructions.
1251 case ISD::FP_TO_SINT: {
1252 SDValue FNeg = N->getOperand(0);
1253 if (FNeg.getOpcode() != ISD::FNEG) {
1254 return SDValue();
1255 }
1256 SDValue SelectCC = FNeg.getOperand(0);
1257 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1258 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1259 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1260 !isHWTrueValue(SelectCC.getOperand(2)) ||
1261 !isHWFalseValue(SelectCC.getOperand(3))) {
1262 return SDValue();
1263 }
1264
1265 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
1266 SelectCC.getOperand(0), // LHS
1267 SelectCC.getOperand(1), // RHS
1268 DAG.getConstant(-1, MVT::i32), // True
1269 DAG.getConstant(0, MVT::i32), // Flase
1270 SelectCC.getOperand(4)); // CC
1271
1272 break;
1273 }
Tom Stellard365366f2013-01-23 02:09:06 +00001274 // Extract_vec (Build_vector) generated by custom lowering
1275 // also needs to be customly combined
1276 case ISD::EXTRACT_VECTOR_ELT: {
1277 SDValue Arg = N->getOperand(0);
1278 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1279 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1280 unsigned Element = Const->getZExtValue();
1281 return Arg->getOperand(Element);
1282 }
1283 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001284 if (Arg.getOpcode() == ISD::BITCAST &&
1285 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1286 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1287 unsigned Element = Const->getZExtValue();
1288 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
1289 Arg->getOperand(0).getOperand(Element));
1290 }
1291 }
Tom Stellard365366f2013-01-23 02:09:06 +00001292 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001293
1294 case ISD::SELECT_CC: {
1295 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1296 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001297 //
1298 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1299 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001300 SDValue LHS = N->getOperand(0);
1301 if (LHS.getOpcode() != ISD::SELECT_CC) {
1302 return SDValue();
1303 }
1304
1305 SDValue RHS = N->getOperand(1);
1306 SDValue True = N->getOperand(2);
1307 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001308 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001309
1310 if (LHS.getOperand(2).getNode() != True.getNode() ||
1311 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001312 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001313 return SDValue();
1314 }
1315
Tom Stellard5e524892013-03-08 15:37:11 +00001316 switch (NCC) {
1317 default: return SDValue();
1318 case ISD::SETNE: return LHS;
1319 case ISD::SETEQ: {
1320 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1321 LHSCC = ISD::getSetCCInverse(LHSCC,
1322 LHS.getOperand(0).getValueType().isInteger());
1323 return DAG.getSelectCC(N->getDebugLoc(),
1324 LHS.getOperand(0),
1325 LHS.getOperand(1),
1326 LHS.getOperand(2),
1327 LHS.getOperand(3),
1328 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001329 }
Tom Stellard5e524892013-03-08 15:37:11 +00001330 }
1331 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001332 case AMDGPUISD::EXPORT: {
1333 SDValue Arg = N->getOperand(1);
1334 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1335 break;
1336 SDValue NewBldVec[4] = {
1337 DAG.getUNDEF(MVT::f32),
1338 DAG.getUNDEF(MVT::f32),
1339 DAG.getUNDEF(MVT::f32),
1340 DAG.getUNDEF(MVT::f32)
1341 };
1342 SDValue NewArgs[8] = {
1343 N->getOperand(0), // Chain
1344 SDValue(),
1345 N->getOperand(2), // ArrayBase
1346 N->getOperand(3), // Type
1347 N->getOperand(4), // SWZ_X
1348 N->getOperand(5), // SWZ_Y
1349 N->getOperand(6), // SWZ_Z
1350 N->getOperand(7) // SWZ_W
1351 };
1352 for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
1353 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
1354 if (C->isZero()) {
1355 NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
1356 } else if (C->isExactlyValue(1.0)) {
1357 NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
1358 } else {
1359 NewBldVec[i] = Arg.getOperand(i);
1360 }
1361 } else {
1362 NewBldVec[i] = Arg.getOperand(i);
1363 }
1364 }
1365 DebugLoc DL = N->getDebugLoc();
1366 NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
1367 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001368 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001369 }
1370 return SDValue();
1371}